clojure split collection in chunks of increasing size - clojure

Hi I am a clojure newbie,
I am trying to create a function that splits a collection into chunks of increasing size something along the lines of
(apply #(#(take %) (range 1 n)) col)
where n is the number of chunks
example of expected output:
with n = 4 and col = (range 1 4)
(1) (2 3) (4)
with n = 7 and col = (range 1 7)
(1) (2 3) (4 5 6) (7)

You can use something like this:
(defn partition-inc
"Partition xs at increasing steps of n"
[n xs]
(lazy-seq
(when (seq xs)
(cons (take n xs)
(partition-inc (inc n) (drop n xs))))))
; (println (take 5 (partition-inc 1 (range))))
; → ((0) (1 2) (3 4 5) (6 7 8 9) (10 11 12 13 14))
Or if you want to have more influence, you could alternatively provide
a sequence for the sizes (behaves the same as above, if passed (iterate inc 1) for sizes:
(defn partition-sizes
"Partition xs into chunks given by sizes"
[sizes xs]
(lazy-seq
(when (and (seq sizes) (seq xs))
(let [n (first sizes)]
(cons (take n xs) (partition-sizes (rest sizes) (drop n xs)))))))
; (println (take 5 (partition-sizes (range 1 10 2) (range))))
; → ((0) (1 2 3) (4 5 6 7 8) (9 10 11 12 13 14 15) (16 17 18 19 20 21 22 23 24))

An eager solution would look like
(defn partition-inc [coll]
(loop [rt [], c (seq coll), n 1]
(if (seq c)
(recur (conj rt (take n c)) (drop n c) (inc n))
rt)))

another way would be to employ some clojure sequences functions:
(->> (reductions (fn [[_ x] n] (split-at n x))
[[] (range 1 8)]
(iterate inc 1))
(map first)
rest
(take-while seq))
;;=> ((1) (2 3) (4 5 6) (7))

Yet another approach...
(defn growing-chunks [src]
(->> (range)
(reductions #(drop %2 %1) src)
(take-while seq)
(map-indexed take)
rest))
(growing-chunks [:a :b :c :d :e :f :g :h :i])
;; => ((:a) (:b :c) (:d :e :f) (:g :h :i))

Related

Mysterious Clojure function

I would like to write a clojure function that has the following behaviour :
(take 4 (floyd))
=> '((1) (2 3) (4 5 6) (7 8 9 10))
(take 3 (floyd))
=> '((1) (2 3) (4 5 6))
(take 1 (floyd))
=> '((1)))
I tried using partition and partition-all to validate these tests however i couldn't get the right solution. If you have any idea of how to do it, i would really appreciate a little help. I started using clojure a few weeks ago and still have some issues.
Thanks
Here's another option:
(defn floyd []
(map (fn [lo n] (range lo (+ lo n 1)))
(reductions + 1 (iterate inc 1))
(range)))
(take 5 (floyd))
;=> ((1) (2 3) (4 5 6) (7 8 9 10) (11 12 13 14 15))
This was arrived at based on the observation that you want a series of increasing ranges (the (range) argument to map is used to produce a sequence of increasingly longer ranges), each one starting from almost the triangular number sequence:
(take 5 (reductions + 0 (iterate inc 1)))
;=> (0 1 3 6 10)
If we start that sequence from 1 instead, we get the starting numbers in your desired sequence:
(take 5 (reductions + 1 (iterate inc 1)))
;=> (1 2 4 7 11)
If the + 1 inside the mapped function bothers you, you could do this instead:
(defn floyd []
(map (fn [lo n] (range lo (+ lo n)))
(reductions + 1 (iterate inc 1))
(iterate inc 1)))
it is not possible to solve it with partition / partition-all, since they split your sequence into predefined size chunks.
What you can do, is to employ recursive lazy function for that:
user> (defn floyd []
(letfn [(f [n rng]
(cons (take n rng)
(lazy-seq (f (inc n) (drop n rng)))))]
(f 1 (iterate inc 1))))
#'user/floyd
user> (take 1 (floyd))
;;=> ((1))
user> (take 2 (floyd))
;;=> ((1) (2 3))
user> (take 3 (floyd))
;;=> ((1) (2 3) (4 5 6))
user> (take 4 (floyd))
;;=> ((1) (2 3) (4 5 6) (7 8 9 10))
another variant can use similar approach, but only track chunk-start/chunk-size:
user> (defn floyd []
(letfn [(f [n start]
(cons (range start (+ start n))
(lazy-seq (f (inc n) (+ start n)))))]
(f 1 1)))
another approach is to use clojure's collection operating functions:
user> (defn floyd-2 []
(->> [1 1]
(iterate (fn [[start n]]
[(+ n start) (inc n)]))
(map (fn [[start n]] (range start (+ start n))))))
#'user/floyd-2
user> (take 4 (floyd-2))
;;=> ((1) (2 3) (4 5 6) (7 8 9 10))
user> (take 5 (floyd-2))
;;=> ((1) (2 3) (4 5 6) (7 8 9 10) (11 12 13 14 15))
user> (take 1 (floyd-2))
;;=> ((1))
How about this:
(defn floyd []
(map (fn[n]
(let [start (/ (* n (inc n)) 2)]
(range (inc start) (+ start n 2))))
(iterate inc 0)))
(take 4 (floyd))

How to take n random items from a collection in Clojure?

I have a function that takes n random items from a collection such that the same item is never picked twice. I could accomplish this quite easily:
(defn take-rand [n coll]
(take n (shuffle coll)))
But I have a pesky requirement that I need to return the same random subset when the same seed is provided, i.e.
(defn take-rand [n coll & [seed]] )
(take-rand 5 (range 10) 42L) ;=> (2 5 8 6 7)
(take-rand 5 (range 10) 42L) ;=> (2 5 8 6 7)
(take-rand 5 (range 10) 27L) ;=> (7 6 9 1 3)
(take-rand 5 (range 10)) ;=> (9 7 8 5 0)
I have a solution for this, but it feels a bit clunky and not very idiomatic. Can any Clojure veterans out there propose improvements (or a completely different approach)?
Here's what I did:
(defn take-rand
"Returns n randomly selected items from coll, or all items if there are fewer than n.
No item will be picked more than once."
[n coll & [seed]]
(let [n (min n (count coll))
rng (if seed (java.util.Random. seed) (java.util.Random.))]
(loop [out [], in coll, n n]
(if (or (empty? in) (= n 0))
out
(let [i (.nextInt rng n)]
(recur (conj out (nth in i))
(concat (take i in) (nthrest in (inc i)))
(dec n)))))))
Well, I'm no clojure veteran, but how about:
(defn shuffle-with-seed
"Return a random permutation of coll with a seed"
[coll seed]
(let [al (java.util.ArrayList. coll)
rnd (java.util.Random. seed)]
(java.util.Collections/shuffle al rnd)
(clojure.lang.RT/vector (.toArray al))))
(defn take-rand [n coll & [seed]]
(take n (if seed
(shuffle-with-seed coll seed)
(shuffle coll))))
shuffle-with-seed is similiar to clojure's shuffle, it just passes an instance of Random to Java's java.util.Collections.shuffle().
Replace the shuffle function in your first solution with the reproducible one. I will show you my solution below:
(defn- shuffle'
[seed coll]
(let [rng (java.util.Random. seed)
rnd #(do % (.nextInt rng))]
(sort-by rnd coll)))
(defn take-rand'
([n coll]
(->> coll shuffle (take n)))
([n coll seed]
(->> coll (shuffle' seed) (take n))))
I hope this solution make results you expected:
user> (take-rand' 5 (range 10))
(5 4 7 2 6)
user> (take-rand' 5 (range 10))
(1 9 0 8 5)
user> (take-rand' 5 (range 10))
(5 2 3 1 8)
user> (take-rand' 5 (range 10) 42)
(2 6 4 8 1)
user> (take-rand' 5 (range 10) 42)
(2 6 4 8 1)
user> (take-rand' 5 (range 10) 42)
(2 6 4 8 1)

Partition a seq by a "windowing" predicate in Clojure

I would like to "chunk" a seq into subseqs the same as partition-by, except that the function is not applied to each individual element, but to a range of elements.
So, for example:
(gather (fn [a b] (> (- b a) 2))
[1 4 5 8 9 10 15 20 21])
would result in:
[[1] [4 5] [8 9 10] [15] [20 21]]
Likewise:
(defn f [a b] (> (- b a) 2))
(gather f [1 2 3 4]) ;; => [[1 2 3] [4]]
(gather f [1 2 3 4 5 6 7 8 9]) ;; => [[1 2 3] [4 5 6] [7 8 9]]
The idea is that I apply the start of the list and the next element to the function, and if the function returns true we partition the current head of the list up to that point into a new partition.
I've written this:
(defn gather
[pred? lst]
(loop [acc [] cur [] l lst]
(let [a (first cur)
b (first l)
nxt (conj cur b)
rst (rest l)]
(cond
(empty? l) (conj acc cur)
(empty? cur) (recur acc nxt rst)
((complement pred?) a b) (recur acc nxt rst)
:else (recur (conj acc cur) [b] rst)))))
and it works, but I know there's a simpler way. My question is:
Is there a built in function to do this where this function would be unnecessary? If not, is there a more idiomatic (or simpler) solution that I have overlooked? Something combining reduce and take-while?
Thanks.
Original interpretation of question
We (all) seemed to have misinterpreted your question as wanting to start a new partition whenever the predicate held for consecutive elements.
Yet another, lazy, built on partition-by
(defn partition-between [pred? coll]
(let [switch (reductions not= true (map pred? coll (rest coll)))]
(map (partial map first) (partition-by second (map list coll switch)))))
(partition-between (fn [a b] (> (- b a) 2)) [1 4 5 8 9 10 15 20 21])
;=> ((1) (4 5) (8 9 10) (15) (20 21))
Actual Question
The actual question asks us to start a new partition whenever pred? holds for the beginning of the current partition and the current element. For this we can just rip off partition-by with a few tweaks to its source.
(defn gather [pred? coll]
(lazy-seq
(when-let [s (seq coll)]
(let [fst (first s)
run (cons fst (take-while #((complement pred?) fst %) (next s)))]
(cons run (gather pred? (seq (drop (count run) s))))))))
(gather (fn [a b] (> (- b a) 2)) [1 4 5 8 9 10 15 20 21])
;=> ((1) (4 5) (8 9 10) (15) (20 21))
(gather (fn [a b] (> (- b a) 2)) [1 2 3 4])
;=> ((1 2 3) (4))
(gather (fn [a b] (> (- b a) 2)) [1 2 3 4 5 6 7 8 9])
;=> ((1 2 3) (4 5 6) (7 8 9))
Since you need to have the information from previous or next elements than the one you are currently deciding on, a partition of pairs with a reduce could do the trick in this case.
This is what I came up with after some iterations:
(defn gather [pred s]
(->> (partition 2 1 (repeat nil) s) ; partition the sequence and if necessary
; fill the last partition with nils
(reduce (fn [acc [x :as s]]
(let [n (dec (count acc))
acc (update-in acc [n] conj x)]
(if (apply pred s)
(conj acc [])
acc)))
[[]])))
(gather (fn [a b] (when (and a b) (> (- b a) 2)))
[1 4 5 8 9 10 15 20 21])
;= [[1] [4 5] [8 9 10] [15] [20 21]]
The basic idea is to make partitions of the number of elements the predicate function takes, filling the last partition with nils if necessary. The function then reduces each partition by determining if the predicate is met, if so then the first element in the partition is added to the current group and a new group is created. Since the last partition could have been filled with nulls, the predicate has to be modified.
Tow possible improvements to this function would be to let the user:
Define the value to fill the last partition, so the reducing function could check if any of the elements in the partition is this value.
Specify the arity of the predicate, thus allowing to determine the grouping taking into account the current and the next n elements.
I wrote this some time ago in useful:
(defn partition-between [split? coll]
(lazy-seq
(when-let [[x & more] (seq coll)]
(lazy-loop [items [x], coll more]
(if-let [[x & more] (seq coll)]
(if (split? [(peek items) x])
(cons items (lazy-recur [x] more))
(lazy-recur (conj items x) more))
[items])))))
It uses lazy-loop, which is just a way to write lazy-seq expressions that look like loop/recur, but I hope it's fairly clear.
I linked to a historical version of the function, because later I realized there's a more general function that you can use to implement partition-between, or partition-by, or indeed lots of other sequential functions. These days the implementation is much shorter, but it's less obvious what's going on if you're not familiar with the more general function I called glue:
(defn partition-between [split? coll]
(glue conj []
(fn [v x]
(not (split? [(peek v) x])))
(constantly false)
coll))
Note that both of these solutions are lazy, which at the time I'm writing this is not true of any of the other solutions in this thread.
Here is one way, with steps split up. It can be narrowed down to fewer statements.
(def l [1 4 5 8 9 10 15 20 21])
(defn reduce_fn [f x y]
(cond
(f (last (last x)) y) (conj x [y])
:else (conj (vec (butlast x)) (conj (last x) y)) )
)
(def reduce_fn1 (partial reduce_fn #(> (- %2 %1) 2)))
(reduce reduce_fn1 [[(first l)]] (rest l))
keep-indexed is a wonderful function. Given a function f and a vector lst,
(keep-indexed (fn [idx it] (if (apply f it) idx))
(partition 2 1 lst)))
(0 2 5 6)
this returns the indices after which you want to split. Let's increment them and tack a 0 at the front:
(cons 0 (map inc (.....)))
(0 1 3 6 7)
Partition these to get ranges:
(partition 2 1 nil (....))
((0 1) (1 3) (3 6) (6 7) (7))
Now use these to generate subvecs:
(map (partial apply subvec lst) ....)
([1] [4 5] [8 9 10] [15] [20 21])
Putting it all together:
(defn gather
[f lst]
(let [indices (cons 0 (map inc
(keep-indexed (fn [idx it]
(if (apply f it) idx))
(partition 2 1 lst))))]
(map (partial apply subvec (vec lst))
(partition 2 1 nil indices))))
(gather #(> (- %2 %) 2) '(1 4 5 8 9 10 15 20 21))
([1] [4 5] [8 9 10] [15] [20 21])

Neat way to apply a function to every nth element of a sequence?

What's a neat way to map a function to every nth element in a sequence ? Something like (map-every-nth fn coll n), so that it would return the original sequence with only every nth element transformed, e.g. (map-every-nth inc (range 16) 4) would return (0 1 2 4 4 5 6 8 8 9 10 12 12 13 14 16)
Try this:
(defn map-every-nth [f coll n]
(map-indexed #(if (zero? (mod (inc %1) n)) (f %2) %2) coll))
(map-every-nth inc (range 16) 4)
> (0 1 2 4 4 5 6 8 8 9 10 12 12 13 14 16)
I suggest that this would be simpler and cleaner than the accepted answer:
(defn map-every-nth [f coll n]
(map f (take-nth n coll)))
This is a handy one to know: http://clojuredocs.org/clojure_core/clojure.core/take-nth
I personally like this solution better:
(defn apply-to-last [f col] (concat (butlast col) (list (f (last col)))))
(apply concat (map #(apply-to-last (fn [x] (* 2 x)) %) (partition 4 (range 16))))
Or as a function:
(defn apply-to-last [f col] (concat (butlast col) (list (f (last col)))))
(defn map-every-nth [f col n] (apply concat (map #(apply-to-last f %) (partition n col))))
(map-every-nth (fn [x] (* 2 (inc x))) (range 16) 4)
; output: (0 1 2 8 4 5 6 16 8 9 10 24 12 13 14 32)
Notice this easily leads to the ability to apply-to-first, apply-to-second or apply-to-third giving the ability to control the "start" of mapping every nth element.
I do not know the performance of the code I wrote above, but it does seem more idiomatic to me.

Clojure: finding sequential items from a sequence

In a Clojure program, I have a sequence of numbers:
(2 3 4 6 8 1)
I want to find the longest sub-sequence where the items are sequential:
(2 3 4)
I am assuming that it will involve (take-while ...) or (reduce ...).
Any ideas?
Clarification: I need the longest initial list of sequential items. Much easier, I'm sure. Thanks for the solutions to the more difficult problem I initially posed.
If you are only interested in the longest initial sequence, it's a 1-liner:
(defn longest-initial-sequence [[x :as s]]
(take-while identity (map #(#{%1} %2) s (iterate inc x))))
Taking into account the OP's comment on the question -- which completely changes the game! -- this can be written very simply:
(let [doubletons (partition 2 1 [1 2 3 5 6])
increment? (fn increment? [[x y]]
(== (inc x) y))]
(cons (ffirst doubletons)
(map second (take-while increment? doubletons))))
;; returns (1 2 3)
Note that this is actually lazy. I expect it not to hold onto the head of doubletons thanks to locals clearing. Another version:
(cons (first [1 2 3 5 6])
(map second (take-while increment? (partition 2 1 [1 2 3 5 6]))))
The original version of the question is more fun, though! :-) A super-simple solution to that could be built using the above, but of course that would be significantly less performant than using reduce. I'll see if I have anything substantially different from zmila's and dnolen's solutions -- and yet still reasonably performant -- to add to that part of this thread later. (Not very likely, I guess.)
Answer to original:
(defn conj-if-sequential
([] [])
([a] a)
([a b] (let [a (if (vector? a) a [a])]
(if (= (inc (last a)) b)
(conj a b)
a))))
(reduce conj-if-sequential [2 3 4 6 8 1])
A more generic solution for those interested:
(defn sequential-seqs
([] [])
([a] a)
([a b] (let [n (last (last a))]
(if (and n (= (inc n) b))
(update-in a [(dec (count a))] conj b)
(conj a [b])))))
(defn largest
([] nil)
([a] a)
([a b] (if (> (count b) (count a)) b a)))
(reduce largest (reduce sequential-seqs [] [2 3 4 6 8 1 4 5 6 7 8 9 13]))
I think this is much better.
(defn find-max-seq [lst]
(let [[f & r] lst,
longest-seq (fn [a b] (if (> (count a) (count b)) a b)),
[last-seq max-seq] (reduce
(fn [ [[prev-num & _ :as cur-seq] max-seq] cur-num ]
(if (== (inc prev-num) cur-num)
[(conj cur-seq cur-num) max-seq]
[(list cur-num) (longest-seq cur-seq max-seq)]
))
[(list f) ()]
r)]
(reverse (longest-seq last-seq max-seq))))
(find-max-seq '(2 3 4 6 8 1)) ; ==> (2 3 4)
(find-max-seq '(3 2 3 4 6 8 9 10 11)) ; ==> (8 9 10 11)