lazy-seq and stack overflow for infinite sequences - clojure

I am trying to show the importance of lazy-sequences or lazy-evaluation to the non-FP programmers. I have written this implementation of prime-generation to show the concept:
(defn primes-gen [sieve]
(if-not (empty? sieve)
(let [prime (first sieve)]
(cons prime
(lazy-seq (primes-gen
(filter (fn [x]
(not= 0 (mod x prime)))
(rest sieve))))))))
;;;;; --------- TO SHOW ABOUT THE LAZY-THINGS
;; (take 400 (primes-gen (iterate inc 2)))
;; (take 400 (primes-gen (range 2 1000000000000N)))
However, i get stack-overflow-exception if i give any bigger value to take.
The stack is :
user> (pst)
StackOverflowError
clojure.core/range/fn--4269 (core.clj:2664)
clojure.lang.LazySeq.sval (LazySeq.java:42)
clojure.lang.LazySeq.seq (LazySeq.java:60)
clojure.lang.RT.seq (RT.java:484)
clojure.core/seq (core.clj:133)
clojure.core/filter/fn--4226 (core.clj:2523)
clojure.lang.LazySeq.sval (LazySeq.java:42)
clojure.lang.LazySeq.seq (LazySeq.java:60)
clojure.lang.RT.seq (RT.java:484)
clojure.core/seq (core.clj:133)
It seems that filter thunks are getting accumulated.
But if do (doall (filter ... then i would not be able to process the infinite sequences i.e. (take 1000 (primes-gen (iterate inc 2))) would not work anymore.
What is the right way to do it ?

Your analysis is spot on: you are nesting filters too much.
You should modify prime-gen to take two args: the set of known primes and candidates.
See my blog for some other ideas on implementing the Erathostenes' sieve.
Update:
So you stack filters over filters and at some point the stack is too big when you want to fetch the new candidate.
You have to merge all the filters into a single (or a reasonable number of) pass. Here it's easy because filters are very homogeneous. So I replace the filters stack by a collection holding the known primes.
(defn primes-gen
([candidates] (primes-gen candidates []))
([candidates known-primes]
(lazy-seq ; I prefer having the lazy-seq up here
(when-first [prime candidates] ; little known macro
(let [known-primes (conj known-primes prime)]
(cons prime
(primes-gen
(drop-while (fn [n] (some #(zero? (mod n %)) known-primes)) candidates)
known-primes)))))))

One of the possible solutions would be moving generator function inside lazy seq. For example (taken from here):
(def primes
(concat
[2 3 5 7]
(lazy-seq
(let [primes-from
(fn primes-from [n [f & r]]
(if (some #(zero? (rem n %))
(take-while #(<= (* % %) n) primes))
(recur (+ n f) r)
(lazy-seq (cons n (primes-from (+ n f) r)))))
wheel (cycle [2 4 2 4 6 2 6 4 2 4 6 6 2 6 4 2
6 4 6 8 4 2 4 2 4 8 6 4 6 2 4 6
2 6 6 4 2 4 6 2 6 4 2 4 2 10 2 10])]
(primes-from 11 wheel)))))

Related

Find elements in list and also keep adjacent element

i have a list like '(1 2 3 1 4 1 1 6 8 9 0 1) (not actually of numbers, just as an example)
I want to keep all "1" and the element next to the "1".
So the result i would want is (1 2 1 4 1 1 6 1).
Coming from an imperative point of view i would iterate over the list with a for loop, find the "1" at a certain index i and then also keep the element at index i+1.
What would a functional, Clojure idiomatic way of solving this problem be?
Using reduce you can move along the original list building a new list as you go. The reducing function f is passed the new list up until now and the next element from the old list. If the list up until now ends with a 1, or the next element is a 1, add the element to the new list. Otherwise keep the new list as is and move along.
user> (def xs [1 2 3 1 4 1 1 6 8 9 0 1])
#'user/xs
user> (defn f [x y] (if (or (= 1 y) (= 1 (peek x))) (conj x y) x))
#'user/f
user> (reduce f [] xs)
[1 2 1 4 1 1 6 1]
When you can't think of anything clever with sequence combinators, write the recursion by hand. It's not exactly elegant, but it's lazy:
(defn keep-pairs [pred coll]
(lazy-seq
(if (empty? coll)
[]
(let [x (first coll)
xs (next coll)]
(if (pred x)
(cons x (when xs
(let [y (first xs)]
(concat (when-not (pred y) [y])
(keep-pairs pred xs)))))
(when xs
(keep-pairs pred xs)))))))
user> (keep-pairs #{1} [1 2 3 1 4 1 1 6 8 9 0 1])
(1 2 1 4 1 1 6 1)
user> (take 10 (keep-pairs #{1} (cycle [1 2 3])))
(1 2 1 2 1 2 1 2 1 2)
I think I'd prefer reduce for something like this, but here's another 'functional' way of looking at it:
You have a sequence of values that should produce a potentially smaller sequence of values based on some predicate (i.e. filtering) and that predicate needs look-ahead/-behind behavior.
A less common use for map is mapping over multiple sequences at once e.g. (map f coll1 coll2 coll3). If you pass in an "offset" version of the same collection it can be used for the look-ahead/-behind logic.
(defn my-pairs [coll]
(mapcat
(fn [prev curr]
(when (or (= 1 prev) (= 1 curr))
[curr]))
(cons ::none coll) ;; these values are used for look-behind
coll))
This is (ab)using mapcat behavior to combine the mapping/filtering into one step, but it could also be phrased with map + filter.
here's one more solution with clojure's seq processors composition:
(defn process [pred data]
(->> data
(partition-by pred)
(partition-all 2 1)
(filter (comp pred ffirst))
(mapcat #(concat (first %) (take 1 (second %))))))
user> (process #{1} [1 2 1 1 3 4 1 5 1])
;;=> (1 2 1 1 3 1 5 1)
user> (process #{1} [0 1 2 1 1 1 3 4 1 5 1 6])
;;=> (1 2 1 1 1 3 1 5 1 6)
Another idea that does not work since it misses a last one:
(def v [1 2 3 1 4 1 1 6 8 9 0 1])
(mapcat (fn [a b] (when (= a 1) [a b])) v (rest v))
;; => (1 2 1 4 1 1 1 6 1)
So use two arity version of mapcat over the vector and the vector shifted one to the right.
You could check that last 1 explicitly and add, then you get a less elegant working version:
(concat
(mapcat (fn [a b] (when (= a 1) [a b])) v (rest v))
(when (= (peek v) 1) [1]))
;; => (1 2 1 4 1 1 1 6 1)
When you need to loop over data and retain state, I think a plain-old loop/recur is the most straightforward technique:
(ns tst.demo.core
(:use tupelo.core tupelo.test))
(defn keep-pairs
[data]
(loop [result []
prev nil
remaining data]
(if (empty? remaining)
result
(let [curr (first remaining)
keep-curr (or (= 1 curr)
(= 1 prev))
result-next (if keep-curr
(conj result curr)
result)
prev-next curr
remaining-next (rest remaining)]
(recur result-next prev-next remaining-next)))))
(dotest
(let [data [1 2 3 1 4 1 1 6 8 9 0 1]]
(is= [1 2 1 4 1 1 6 1]
(keep-pairs data))))
(defn windowed-pred [n pred]
(let [window (atom [])]
(fn [rf]
(fn ([] (rf))
([acc] (rf acc))
([acc v]
(let [keep? (or (pred v) (some pred #window))]
(swap! window #(vec (take-last n (conj %1 %2))) v)
(if keep?
(rf acc v)
acc)))))))
(let [c [1 2 3 1 4 1 1 6 8 9 0 1]
pred #(= % 1)]
(eduction (windowed-pred 1 pred) c))
(defn last-or-first? [obj pair] (or (= obj (last pair)) (= obj (first pair))))
; to test, whether previous element or element is object
(defn back-shift [l] (cons nil (butlast l))) ;; back-shifts a list
(defn keep-with-follower
[obj l]
(map #'last ; take only the element itself without its previous element
(filter #(last-or-first? obj %) ; is element or previous element the object?
(map #'list (back-shift l) l)))) ; group previous element and element in list
(def l '(1 2 3 1 4 1 1 6 8 9 0 1))
(keep-with-follower 1 l)
;; => (1 2 1 4 1 1 6 1)
A functional solution using only cons first last butlast list map filter = and defn and def.

How to find numbers that occur more than once in a list using clojure

I have a list of numbers 2 4 3 7 4 9 8 5 12 24 8.
I need to find numbers which are repeated more than once in clojure.
I have used frequencies function to find. But the result is
{2 1,4 2,3 1,7 1,9 1,8 2,5 1,12 1,24 1}
I intially thought of considering them as key value and then take each key value once and see if val is > 1. if value is greater than 1 then I need to inc 1.
But I am unable to work this out.
Can anyone please help me??
Is there anyway I can make this into [[2 1][4 2][3 1][7 1][9 1][8 2][5 1][12 1][24 1]] and consider each vector recursively or any better idea you can think of.
Thank you.
The function below will just continue on where you have stuck:
(defn find-duplicates [numbers]
(->> numbers
(frequencies)
(filter (fn [[k v]] (> v 1)))
(keys)))
It will filter map entries that have values greater than 1 and then extract their keys.
(find-duplicates [2 4 3 7 4 9 8 5 12 24 8])
;; => (4 8)
If you want the repeated items:
(defn repeated [coll]
(->> coll
frequencies
(remove #(= 1 (val %)))
keys))
(repeated [2 4 3 7 4 9 8 5 12 24 8])
;(4 8)
If you just want to count them:
(defn repeat-count [coll]
(->> coll
frequencies
(remove #(= 1 (val %)))
count))
(repeat-count [2 4 3 7 4 9 8 5 12 24 8])
;2
You can do it lazily, so that it will work on an endless sequence:
(defn repeated [coll]
((fn ff [seen xs]
(lazy-seq
(when-let [[y & ys] (seq xs)]
(case (seen y)
::several (ff seen ys)
::once (cons y (ff (assoc seen y ::several) ys))
(ff (assoc seen y ::once) ys)))))
{} coll))
(repeated [2 4 3 7 4 9 8 5 12 24 8])
;(4 8)
This is similar to core distinct.
... and finally, for brevity, ...
(defn repeated [coll]
(for [[k v] (frequencies coll) :when (not= v 1)] k))
I stole the use of keys from Piotrek Byzdyl's answer. It is only supposed to apply to a map. but works perfectly well here on a sequence of map-entries.
(->> s (group-by identity) (filter (comp next val)) keys)
You are on the right track.
If you seq over hash-map, e. g. via map, you get the kv tuple structure you described and can destructure an individual tuple in the element transformation function:
(->> s
(frequencies)
(map (fn [[number times]]
(cond-> number ; take number
(> times 1) (inc))))) ; inc if (times > 1), otherwise return number
You can use this approach.
(def c [2 4 3 7 4 9 8 5 12 24 8])
(->> c
sort
(partition-by identity)
(filter #(> (count %) 1))
(map first))

Lazy Pascal's Triangle in Clojure

I'm trying to write a succinct, lazy Pascal's Triangle in Clojure, rotated such that the rows/columns follow the diagonals of the triangle. That is, I want to produce the following lazy-seq of lazy-seqs:
((1 1 1 1 ...)
(1 2 3 4 ...)
(1 3 6 10 ...)
...
)
The code I have written is:
(def pascal
(cons (repeat 1)
(lazy-seq
(map #(map + %1 %2)
(map #(cons 0 %) (rest pascal)))
pascal
)))
so that each row is formed by adding a right-shifted version of itself to the previous row. The problem is that it never gets past the first line, since at that point (map #(cons 0 %) (rest pascal))) is empty.
=> (take 5 (map #(take 5 %) pascal))
((1 1 1 1 1))
What's a sensible way to go about solving this? I'm fairly new to programming in Clojure, and the very different way of thinking about a problem that it involves, so I'd really appreciate suggestions from anybody more experienced with this.
Succinct and lazy
(def pascal (iterate (partial reductions +') (repeat 1)))
(map (partial take 5) (take 5 pascal))
;=> ((1 1 1 1 1)
; (1 2 3 4 5)
; (1 3 6 10 15)
; (1 4 10 20 35)
; (1 5 15 35 70))
But too lazy?
(take 5 (nth pascal 10000))
;=> StackOverflowError
Try again
(take 5 (nth pascal 10000))
;=> (0)
Uh-oh, start over, and try, try again
(def pascal (iterate (partial reductions +') (repeat 1)))
(count (flatten (map (partial take 5) (take 100000 pascal))))
;=> 500000
Now these are all in your heap
(take 5 (nth pascal 100000))
;=> (1 100001 5000150001 166676666850001 4167083347916875001)
pascal should not be a var but a function that generates infinite seqs.
Check out this question for usage on lazy-seq
BTW, try this:
(defn gennext [s sum]
(let [newsum (+ (first s) sum)]
(cons newsum
(lazy-seq (gennext (rest s) newsum)))))
(defn pascal [s]
(cons s
(lazy-seq (pascal (gennext s 0)))))
(pascal (repeat 1)) gives you integer overflow exception but that does mean it produces the infinite seqs. You can use +' to use big integer.

Overhand Shuffle with Clojure - almost

I'm trying to implement a Overhand Shuffle in Clojure as a bit of a learning exercise
So I've got this code...
(defn overhand [cards]
(let [ card_count (count cards)
_new_cards '()
_rand_ceiling (if (> card_count 4) (int (* 0.2 card_count)) 1)]
(take card_count
(reduce into (mapcat
(fn [c]
(-> (inc (rand-int _rand_ceiling))
(take cards)
(cons _new_cards)))
cards)))))
It is very close to doing what I want, but it is repeatedly taking the first (random) N number of cards off the front, but I want it to progress through the list...
calling as
(overhand [1 2 3 4 5 6 7 8 9])
instead of ending up with
(1 2 3 1 2 1 2 3 4)
I want to end up with
(7 8 9 5 6 1 2 3 4)
Also, as a side note this feels like a really ugly way to indent/organize this function, is there a more obvious way?
this function is creating a list of lists, transforming each of them, and cating them back together. the problem it that it is pulling from the same thing every time and appending to a fixed value. essentially it is running the same operation every time and so it is repeating the output over with out progressing thgough the list. If you break the problem down differently and split the creation of random sized chunks from the stringing them together it gets a bit easier to see how to make it work correctly.
some ways to split the sequence:
(defn random-partitions [cards]
(let [card_count (count cards)
rand_ceiling (if (> card_count 4) (inc (int (* 0.2 card_count))) 1)]
(partition-by (ƒ [_](= 0 (rand-int rand_ceiling))) cards)))
to keep the partitions less than length four
(defn random-partitions [cards]
(let [[h t] (split-at (inc (rand-int 4)) cards)]
(when (not-empty h) (lazy-seq (cons h (random-partition t))))))
or to keep the partitions at the sizes in your original question
(defn random-partitions [cards]
(let [card_count (count cards)
rand_ceiling (if (> card_count 4) (inc (int (* 0.2 card_count))) 1)
[h t] (split-at (inc (rand-int rand_ceiling)) cards)]
(when (not-empty h) (lazy-seq (cons h (random-partition t))))))
(random-partitions [1 2 3 4 5 6 7 8 9 10])
((1 2 3 4) (5) (6 7 8 9) (10))
this can also be written without directly using lazy-seq:
(defn random-partitions [cards]
(->> [[] cards]
(iterate
(ƒ [[h t]]
(split-at (inc (rand-int 4)) t)))
rest ;iterate returns its input as the first argument, drop it.
(map first)
(take-while not-empty)))
which can then be reduced back into a single sequence:
(reduce into (random-partitions [1 2 3 4 5 6 7 8 9 10]))
(10 9 8 7 6 5 4 3 1 2)
if you reverse the arguments to into it looks like a much better shuffle
(reduce #(into %2 %1) (random-partitions [1 2 3 4 5 6 7 8 9 10]))
(8 7 1 2 3 4 5 6 9 10)
Answering your indentation question, you could refactor your function. For instance, pull out the lambda expression from mapcat, defn it, then use its name in the call to mapcat. You'll not only help with the indentation, but your mapcat will be clearer.
For instance, here's your original program, refactored. Please note that issues with your program have not been corrected, I'm just showing an example of refactoring to improve the layout:
(defn overhand [cards]
(let [ card_count (count cards)
_new_cards '()
_rand_ceiling (if (> card_count 4) (int (* 0.2 card_count)) 1)]
(defn f [c]
(-> (inc (rand-int _rand_ceiling))
(take cards)
(cons _new_cards)))
(take card_count (reduce into (mapcat f cards)))))
You can apply these principles to your fixed code.
A great deal of indentation issues can be resolved by simply factoring out complex expressions. It also helps readability in general.
A better way to organise the function is to separate the shuffling action from the random selection of splitting points that drive it. Then we can test the shuffler with predictable splitters.
The shuffling action can be expressed as
(defn shuffle [deck splitter]
(if (empty? deck)
()
(let [[taken left] (split-at (splitter (count deck)) deck)]
(concat (shuffle left splitter) taken))))
where
deck is the sequence to be shuffled
splitter is a function that chooses where to split deck, given its
size.
We can test shuffle for some simple splitters:
=> (shuffle (range 10) (constantly 3))
(9 6 7 8 3 4 5 0 1 2)
=> (shuffle (range 10) (constantly 2))
(8 9 6 7 4 5 2 3 0 1)
=> (shuffle (range 10) (constantly 1))
(9 8 7 6 5 4 3 2 1 0)
It works.
Now let's look at the way you choose your splitting point. We can illustrate your choice of _rand_ceiling thus:
=> (map
(fn [card_count] (if (> card_count 4) (int (* 0.2 card_count)) 1))
(range 20))
(1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3)
This implies that you will take just one or two cards from any deck of less than ten. By the way, a simpler way to express the function is
(fn [card_count] (max (quot card_count 5) 1))
So we can express your splitter function as
(fn [card_count] (inc (rand-int (max (quot card_count 5) 1))))
So the shuffler we want is
(defn overhand [deck]
(let [splitter (fn [card_count] (inc (rand-int (max (quot card_count 5) 1))))]
(shuffle deck splitter)))

Clojure: finding sequential items from a sequence

In a Clojure program, I have a sequence of numbers:
(2 3 4 6 8 1)
I want to find the longest sub-sequence where the items are sequential:
(2 3 4)
I am assuming that it will involve (take-while ...) or (reduce ...).
Any ideas?
Clarification: I need the longest initial list of sequential items. Much easier, I'm sure. Thanks for the solutions to the more difficult problem I initially posed.
If you are only interested in the longest initial sequence, it's a 1-liner:
(defn longest-initial-sequence [[x :as s]]
(take-while identity (map #(#{%1} %2) s (iterate inc x))))
Taking into account the OP's comment on the question -- which completely changes the game! -- this can be written very simply:
(let [doubletons (partition 2 1 [1 2 3 5 6])
increment? (fn increment? [[x y]]
(== (inc x) y))]
(cons (ffirst doubletons)
(map second (take-while increment? doubletons))))
;; returns (1 2 3)
Note that this is actually lazy. I expect it not to hold onto the head of doubletons thanks to locals clearing. Another version:
(cons (first [1 2 3 5 6])
(map second (take-while increment? (partition 2 1 [1 2 3 5 6]))))
The original version of the question is more fun, though! :-) A super-simple solution to that could be built using the above, but of course that would be significantly less performant than using reduce. I'll see if I have anything substantially different from zmila's and dnolen's solutions -- and yet still reasonably performant -- to add to that part of this thread later. (Not very likely, I guess.)
Answer to original:
(defn conj-if-sequential
([] [])
([a] a)
([a b] (let [a (if (vector? a) a [a])]
(if (= (inc (last a)) b)
(conj a b)
a))))
(reduce conj-if-sequential [2 3 4 6 8 1])
A more generic solution for those interested:
(defn sequential-seqs
([] [])
([a] a)
([a b] (let [n (last (last a))]
(if (and n (= (inc n) b))
(update-in a [(dec (count a))] conj b)
(conj a [b])))))
(defn largest
([] nil)
([a] a)
([a b] (if (> (count b) (count a)) b a)))
(reduce largest (reduce sequential-seqs [] [2 3 4 6 8 1 4 5 6 7 8 9 13]))
I think this is much better.
(defn find-max-seq [lst]
(let [[f & r] lst,
longest-seq (fn [a b] (if (> (count a) (count b)) a b)),
[last-seq max-seq] (reduce
(fn [ [[prev-num & _ :as cur-seq] max-seq] cur-num ]
(if (== (inc prev-num) cur-num)
[(conj cur-seq cur-num) max-seq]
[(list cur-num) (longest-seq cur-seq max-seq)]
))
[(list f) ()]
r)]
(reverse (longest-seq last-seq max-seq))))
(find-max-seq '(2 3 4 6 8 1)) ; ==> (2 3 4)
(find-max-seq '(3 2 3 4 6 8 9 10 11)) ; ==> (8 9 10 11)