group-by with reduce in clojure - clojure

I want to aggregate large dataset to get something like
SELECT SUM(`profit`) as `profit`, `month` FROM `t` GROUP BY `month`
So, i modified clojure's group-by function like so
(defn group-reduce [f red coll]
(persistent!
(reduce
(fn [ret x]
(let [k (f x)]
(assoc! ret k (red (get ret k) x))))
(transient {}) coll)))
And here is usage:
(group-reduce :month (fn [s x]
(if s
(assoc s :profit (+ (:profit s) (:profit x)))
x))
[{:month 10 :profit 12}
{:month 10 :profit 15}
{:month 12 :profit 1}])
#_=> {10 {:profit 27, :month 10}, 12 {:profit 1, :month 12}}
It works, but maybe there is another way to do this, using clojure standard library?

Closest in the core is merge-with:
(def t [{:month 10 :profit 12}
{:month 10 :profit 15}
{:month 12 :profit 1}])
(apply merge-with + (for [x t] {(:month x) (:profit x)}))
;=> {12 1, 10 27}

Some examples:
user=> (def groups (group-by :month [{:month 10 :profit 12}
#_=> {:month 10 :profit 15}
#_=> {:month 12 :profit 1}])
{10 [{:profit 12, :month 10} {:profit 15, :month 10}], 12 [{:profit 1, :month 12}]}
user=> (for [[k v] groups] {:month k :sum-profit (apply + (map :profit v))})
({:month 10, :sum-profit 27} {:month 12, :sum-profit 1})
user=> (into {} (for [[k v] groups] [k (apply + (map :profit v))]))
{10 27, 12 1}

Related

Clojure: how to move vector elements in a map elegantly

In clojure, I am trying to accomplish the following logic:
Input:
{:a [11 22 33] :b [10 20 30]}, 2
Output:
{:a [11] :b [10 20 30 22 33]}
i.e. Move the last 2 elements from :a to :b
Is there a clojurish way for this operation?
Since you're effectively modifying both mappings in the map, it's probably easiest to explicitly deconstruct the map and just return the new map via a literal, using subvec and into for the vector manipulation:
(defn move [m n]
(let [{:keys [a b]} m
i (- (count a) n)
left (subvec a 0 i)
right (subvec a i)]
{:a left :b (into b right)}))
(move {:a [11 22 33] :b [10 20 30]} 2)
;;=> {:a [11], :b [10 20 30 22 33]}
As a bonus, this particular implementation is both very idiomatic and very fast.
Alternatively, using the split-at' function from here, you could write it like this:
(defn split-at' [n v]
[(subvec v 0 n) (subvec v n)])
(defn move [m n]
(let [{:keys [a b]} m
[left right] (split-at' (- (count a) n) a)]
{:a left :b (into b right)}))
First, using the sub-vec in the other answers will throw an IndexOutOfBoundsException when the number of elements to be moved is greater than the size of the collection.
Secondly, the destructuring, the way most have done here, couples the function to one specific data structure. This being, a map with keys :a and :b and values for these keys that are vectors. Now if you change one of the keys in the input, then you need to also change it in move function.
My solution follows:
(defn move [colla collb n]
(let [newb (into (into [] collb) (take-last n colla))
newa (into [] (drop-last n colla))]
[newa newb]))
This should work for any collection and will return vector of 2 vectors. My solution is far more reusable. Try:
(move (range 100000) (range 200000) 10000)
Edit:
Now you can use first and second to access the vector you need in the return.
I would do it just a little differently than Josh:
(defn tx-vals [ {:keys [a b]} num-to-move ]
{:a (drop-last num-to-move a)
:b (concat b (take-last num-to-move a)) } )
(tx-vals {:a [11 22 33], :b [10 20 30]} 2)
=> {:a (11), :b (10 20 30 22 33)}
Update
Sometimes it may be more convenient to use the clojure.core/split-at function as follows:
(defn tx-vals-2 [ {:keys [a b]} num-to-move ]
(let [ num-to-keep (- (count a) num-to-move)
[a-head, a-tail] (split-at num-to-keep a) ]
{ :a a-head
:b (concat b a-tail) } ))
If vectors are preferred on output (my favorite!), just do:
(defn tx-vals-3 [ {:keys [a b]} num-to-move ]
(let [ num-to-keep (- (count a) num-to-move)
[a-head, a-tail] (split-at num-to-keep a) ]
{:a (vec a-head)
:b (vec (concat b a-tail))} ))
to get the results:
(tx-vals-2 data 2) => {:a (11), :b (10 20 30 22 33)}
(tx-vals-3 data 2) => {:a [11], :b [10 20 30 22 33]}
(defn f [{:keys [a b]} n]
(let [last-n (take-last n a)]
{:a (into [] (take (- (count a) n) a))
:b (into b last-n)}))
(f {:a [11 22 33] :b [10 20 30]} 2)
=> {:a [11], :b [10 20 30 22 33]}
In case if the order of those items does not matter, here is my attempt:
(def m {:a [11 22 33] :b [10 20 30]})
(defn so-42476918 [{:keys [a b]} n]
(zipmap [:a :b] (map vec (split-at (- (count a) n) (concat a b)))))
(so-42476918 m 2)
gives:
{:a [11], :b [22 33 10 20 30]}
i would go with an approach, which differs a bit from the previous answers (well, technically it is the same, but it differs on the application-scale level).
First of all, transferring data between two collections is quite a frequent task, so it at least deserves some special utility function for that in your library:
(defn transfer [from to n & {:keys [get-from put-to]
:or {:get-from :start :put-to :end}}]
(let [f (if (= get-from :end)
(partial split-at (- (count from) n))
(comp reverse (partial split-at n)))
[from swap] (f from)]
[from (if (= put-to :start)
(concat swap to)
(concat to swap))]))
ok, it looks verbose, but it lets you transfer data from start/end of one collection to start/end of the other:
user> (transfer [1 2 3] [4 5 6] 2)
[(3) (4 5 6 1 2)]
user> (transfer [1 2 3] [4 5 6] 2 :get-from :end)
[(1) (4 5 6 2 3)]
user> (transfer [1 2 3] [4 5 6] 2 :put-to :start)
[(3) (1 2 4 5 6)]
user> (transfer [1 2 3] [4 5 6] 2 :get-from :end :put-to :start)
[(1) (2 3 4 5 6)]
So what's left, is to make your domain specific function on top of it:
(defn move [data n]
(let [[from to] (transfer (:a data) (:b data) n
:get-from :end
:put-to :end)]
(assoc data
:a (vec from)
:b (vec to))))
user> (move {:a [1 2 3 4 5] :b [10 20 30 40] :c [:x :y]} 3)
{:a [1 2], :b [10 20 30 40 3 4 5], :c [:x :y]}

use #() instead of (fn ...) in (sorted-map-by ...)

I would like to translate the inner-function call in the following snippet, to one using the #() macro :
(let [m {:a 3, :b 2, :c 4, :x 9, :y 0, :z 5}]
(into (sorted-map-by (fn [key1 key2]
(compare [(get m key2)]
[(get m key1)]))) m))
I am a little bit confused on how I can accomplish that.
Inside an anonymous function, the arguments are given by %1, %2... so you can use
(let [m {:a 3, :b 2, :c 4, :x 9, :y 0, :z 5}]
(into (sorted-map-by #(compare (get m %2)
(get m %1))) m))
note you don't need to wrap the compared values in a vector.

In clojure, how to reverse a map hierarchy [duplicate]

This question already has answers here:
Turn a hash map inside out in Clojure
(3 answers)
Closed 8 years ago.
In clojure, I have a map that contains for each day, and each fruit, the number of fruits eaten. I would like to "reverse the hierarchy" of the map and to return the same data but with the fruits at the top of the hierarchy.
I will explain by an example:
(map-reverse-hierarchy {:monday {:banana 2 :apple 3}
:tuesday {:banana 5 :orange 2}})
; => {:orange {:tuesday 2},
; :banana {:tuesday 5, :monday 2},
; :apple {:monday 3}}
You could use a list comprehension and some destructuring, like
user=> (->> (for [[day consum] data
#_=> [fruit amount] consum]
#_=> {fruit {day amount}})
#_=> (apply merge-with conj))
{:orange {:tuesday 2}, :banana {:tuesday 5, :monday 2}, :apple {:monday 3}}
user=>
or using a function + mapcat instead:
(defn flip [[day consum]]
(map (fn [[fruit amount]] {fruit {day amount}}) consum))
(apply merge-with conj (mapcat flip data))
My solution first transposes the pieces of the nested maps and then merges them all.
The pieces are transposed from {k1 {k2 v}}to {k2 {k1 v}} and then merged by apply merge-with conj
(defn map-reverse-hierarchy [mm]
(apply merge-with conj
(for [[k1 m] mm [k2 v] m] {k2 {k1 v}})))
Maybe:
(defn map-reverse-hierarchy [m]
(let [foo (fn [a lst]
(map #(do [(first %) {a (second %)}]) lst))
bbb (map (fn [[a b]] (into {} (foo a b))) m)]
(if (seq bbb)
(apply merge-with merge bbb)
{})))
(map-reverse-hierarchy {:monday {:banana 2 :apple 3}
:tuesday {:banana 5 :orange 2}})
;{:banana {:monday 2, :tuesday 5}, :apple {:monday 3}, :orange {:tuesday 2}}
I think you'll need some custom function. Use clojure.set/map-invert[1] to swap keys and values in hash-map
[1] http://clojure.github.io/clojure/clojure.set-api.html#clojure.set/map-invert
brute-force solution:
(defn x []
(let [i {:monday {:banana 2 :apple 3}
:tuesday {:banana 5 :orange 2}}]
(reduce-kv (fn [h day fruits]
(reduce-kv (fn [h fruit n]
(update-in h [fruit day] #(+ (or % 0) n))) h fruits)) {} i)))
user> (pprint (x))
{:orange {:tuesday 2},
:banana {:tuesday 5, :monday 2},
:apple {:monday 3}}

How to reduce a nested collection without using mutable state?

Given a nested collection I would like to reduce it to only the k-v pairs which are the form [_ D] where D is an integer. For instance I would like to transform as follows:
; Start with this ...
{:a {:val 1 :val 2} :b {:val 3 :c {:val 4}} :val 5}
; ... end with this
{:val 1, :val 2, :val 3, :val 4, :val 5}
I have written a function using postwalk as follows:
(defn mindwave-values [data]
(let [values (atom {})
integer-walk (fn [x]
(if (map? x)
(doseq [[k v] x]
(if (integer? v) (swap! values assoc k v)))
x))]
(postwalk integer-walk data)
#values))
I am curious if it is possible to do this without using mutable state?
EDIT The original function was not quite correct.
Your example data structure is not a legal map, so I've changed it a bit:
(defn int-vals [x]
(cond (map? x) (mapcat int-vals x)
(coll? x) (when (= 2 (count x))
(if (integer? (second x))
[x]
(int-vals (second x))))))
user> (int-vals {:a {:x 1 :y 2} :b {:val 3 :c {:val 4}} :val 5})
([:y 2] [:x 1] [:val 4] [:val 3] [:val 5])
Your requirements are a bit vague: you say "collection", but your example contains only maps, so I've just had to guess at what you intended.

How to Increment Values in a Map

I am wrapping my head around state in Clojure. I come from languages where state can be mutated. For example, in Python, I can create a dictionary, put some string => integer pairs inside, and then walk over the dictionary and increment the values.
How would I do this in idiomatic Clojure?
(def my-map {:a 1 :b 2})
(zipmap (keys my-map) (map inc (vals my-map)))
;;=> {:b 3, :a 2}
To update only one value by key:
(update-in my-map [:b] inc) ;;=> {:a 1, :b 3}
Since Clojure 1.7 it's also possible to use update:
(update my-map :b inc)
Just produce a new map and use it:
(def m {:a 3 :b 4})
(apply merge
(map (fn [[k v]] {k (inc v) }) m))
; {:b 5, :a 4}
To update multiple values, you could also take advantage of reduce taking an already filled accumulator, and applying a function on that and every member of the following collection.
=> (reduce (fn [a k] (update-in a k inc)) {:a 1 :b 2 :c 3 :d 4} [[:a] [:c]])
{:a 2, :c 4, :b 2, :d 4}
Be aware of the keys needing to be enclosed in vectors, but you can still do multiple update-ins in nested structures like the original update in.
If you made it a generalized function, you could automatically wrap a vector over a key by testing it with coll?:
(defn multi-update-in
[m v f & args]
(reduce
(fn [acc p] (apply
(partial update-in acc (if (coll? p) p (vector p)) f)
args)) m v))
which would allow for single-level/key updates without the need for wrapping the keys in vectors
=> (multi-update-in {:a 1 :b 2 :c 3 :d 4} [:a :c] inc)
{:a 2, :c 4, :b 2, :d 4}
but still be able to do nested updates
(def people
{"keith" {:age 27 :hobby "needlefelting"}
"penelope" {:age 39 :hobby "thaiboxing"}
"brian" {:age 12 :hobby "rocket science"}})
=> (multi-update-in people [["keith" :age] ["brian" :age]] inc)
{"keith" {:age 28, :hobby "needlefelting"},
"penelope" {:age 39, :hobby "thaiboxing"},
"brian" {:age 13, :hobby "rocket science"}}
To slightly improve #Michiel Brokent's answer. This will work if the key already doesn't present.
(update my-map :a #(if (nil? %) 1 (inc %)))
I've been toying with the same idea, so I came up with:
(defn remap
"returns a function which takes a map as argument
and applies f to each value in the map"
[f]
#(into {} (map (fn [[k v]] [k (f v)]) %)))
((remap inc) {:foo 1})
;=> {:foo 2}
or
(def inc-vals (remap inc))
(inc-vals {:foo 1})
;=> {:foo 2}