Custom map function behaves oddly in lazy scenarios - clojure

For kicks, I decided to write my own version of map, but finally learn how to use lazy-seq correctly and use it to make the map lazy:
(defn my-map [f [head & tail]]
(lazy-seq
(if head
(cons (f head) (my-map f tail))
tail)))
It works, but when I tested its lazy behavior against map, I noticed something different. I'm using a helper-map function that prints when an element is processed:
(defn print-map [description-str f coll mapping-f]
(mapping-f
(fn [x]
(do
(print (str description-str ":" x))
(f x)))
coll))
When I use the standard map function, the elements are processed one at a time, alternating between functions:
(defn -main []
(let [m map
coll (into '() (range 10 0 -1))
coll2 (print-map "A" identity coll m)
coll3 (print-map "B" identity coll2 m)]
(println (doall coll3))))
Prints:
A:1 B:1 A:2 B:2 A:3 B:3 A:4 B:4 A:5 B:5 A:6 B:6 A:7 B:7 A:8 B:8 A:9 B:9 A:10 B:10 (1 2 3 4 5 6 7 8 9 10)
Note how each number is processed by both functions first before the rest of the elements are seen by either function.
But when I change m in -main to my-map, the processing order changes slightly:
A:1 A:2 B:1 A:3 B:2 A:4 B:3 A:5 B:4 A:6 B:5 A:7 B:6 A:8 B:7 A:9 B:8 A:10 B:9 B:10 (1 2 3 4 5 6 7 8 9 10)
Now the first function is run twice to start, the second function runs twice in a row in the end, and as a consequence, the mappings are no longer "in sync".
What's wrong with my-map that causes this to happen?

The destructing you do in my-map will call next on your lazy sequence.
You can avoid that by not destructing:
(defn my-map [f [x :as xs]]
#_(next xs) ;; uncomment to observere similar "broken" behaviour
(lazy-seq
(if x
(cons (f x) (my-map f (rest xs)))
(rest xs))))
;; You can find out what destructing does with this call:
(destructure '[[x & r :as xs] numbers])
And next is not as lazy as rest.

Related

Clojure function to Replace Count

I need help with an assignment that uses Clojure. It is very small but the language is a bit confusing to understand. I need to create a function that behaves like count without actually using the count funtion. I know a loop can be involved with it somehow but I am at a lost because nothing I have tried even gets my code to work. I expect it to output the number of elements in list. For example:
(defn functionname []
...
...)
(println(functionname '(1 4 8)))
Output:3
Here is what I have so far:
(defn functionname [n]
(def n 0)
(def x 0)
(while (< x n)
do
()
)
)
(println(functionname '(1 4 8)))
It's not much but I think it goes something like this.
This implementation takes the first element of the list and runs a sum until it can't anymore and then returns the sum.
(defn recount [list-to-count]
(loop [xs list-to-count sum 0]
(if (first xs)
(recur (rest xs) (inc sum))
sum
)))
user=> (recount '(3 4 5 9))
4
A couple more example implementations:
(defn not-count [coll]
(reduce + (map (constantly 1) coll)))
or:
(defn not-count [coll]
(reduce (fn [a _] (inc a)) 0 coll))
or:
(defn not-count [coll]
(apply + (map (fn [_] 1) coll)))
result:
(not-count '(5 7 8 1))
=> 4
I personally like the first one with reduce and constantly.

Why the program runs endlessly?

Why the program runs endlessly?
(defn lazycombine
([s] (lazycombine s []))
([s v] (let [a (take 1 s)
b (drop 1 s)]
(if (= a :start)
(lazy-seq (lazycombine b v))
(if (= a :end)
(lazy-seq (cons v (lazycombine b [])))
(lazy-seq (lazycombine b (conj v a))))))))
(def w '(:start 1 2 3 :end :start 7 7 :end))
(lazycombine w)
I need a function that returns a lazy sequence of elements by taking elements from another sequence of the form [: start 1 2: end: start: 5: end] and combining all the elements between: start and: end into a vector
You need to handle the termination condition - i.e. what should return when input s is empty?
And also the detection of :start and :end should use first instead of (take 1 s). And you can simplify that with destructuring.
(defn lazycombine
([s] (lazycombine s []))
([[a & b :as s] v]
(if (empty? s)
v
(if (= a :start)
(lazy-seq (lazycombine b v))
(if (= a :end)
(lazy-seq (cons v (lazycombine b [])))
(lazy-seq (lazycombine b (conj v a))))))))
(def w '(:start 1 2 3 :end :start 7 7 :end))
(lazycombine w)
;; => ([1 2 3] [7 7])
To reduce cyclomatic complexity a bit, you can use condp to replace couple if:
(defn lazycombine
([s] (lazycombine s []))
([[a & b :as s] v]
(if (empty? s)
v
(lazy-seq
(condp = a
:start (lazycombine b v)
:end (cons v (lazycombine b []))
(lazycombine b (conj v a)))))))
I would do it like so, using take-while:
(ns tst.demo.core
(:use tupelo.core tupelo.test))
(def data
[:start 1 2 3 :end :start 7 7 :end])
(defn end-tag? [it] (= it :end))
(defn start-tag? [it] (= it :start))
(defn lazy-segments
[data]
(when-not (empty? data)
(let [next-segment (take-while #(not (end-tag? %)) data)
data-next (drop (inc (count next-segment)) data)
segment-result (vec (remove #(start-tag? %) next-segment))]
(cons segment-result
(lazy-seq (lazy-segments data-next))))))
(dotest
(println "result: " (lazy-segments data)))
Running we get:
result: ([1 2 3] [7 7])
Note the contract when constructing a sequence recursively using cons (lazy or not). You must return either the next value in the sequence, or nil. Supplying nil to cons is the same as supplying an empty sequence:
(cons 5 nil) => (5)
(cons 5 []) => (5)
So it is convenient to use a when form to test the termination condition (instead of using if and returning an empty vector when the sequence must end).
Suppose we wrote the cons as a simple recursion:
(cons segment-result
(lazy-segments data-next))
This works great and produces the same result. The only thing the lazy-seq part does is to delay when the recursive call takes place. Because lazy-seq is a Clojure built-in (special form), it it is similar to loop/recur and does not consume the stack like ordinary recursion does . Thus, we can generate millions (or more) values in the lazy sequence without creating a StackOverflowError (on my computer, the default maximum stack size is ~4000). Consider the infinite lazy-sequence of integers beginning at 0:
(defn intrange
[n]
(cons n (lazy-seq (intrange (inc n)))))
(dotest
(time
(spyx (first (drop 1e6 (intrange 0))))))
Dropping the first million integers and taking the next one succeeds and requires only a few milliseconds:
(first (drop 1000000.0 (intrange 0))) => 1000000
"Elapsed time: 49.5 msecs"

Make (map f c1 c2) map (count c1) times, even if c2 has less elements

When doing
(map f [0 1 2] [:0 :1])
f will get called twice, with the arguments being
0 :0
1 :1
Is there a simple yet efficient way, i.e. without producing more intermediate sequences etc., to make f get called for every value of the first collection, with the following arguments?
0 :0
1 :1
2 nil
Edit Addressing question by #fl00r in the comments.
The actual use case that triggered this question needed map to always work exactly (count first-coll) times, regardless if the second (or third, or ...) collection was longer.
It's a bit late in the game now and somewhat unfair after having accepted an answer, but if a good answer gets added that only does what I specifically asked for - mapping (count first-coll) times - I would accept that.
You could do:
(map f [0 1 2] (concat [:0 :1] (repeat nil)))
Basically, pad the second coll with an infinite sequence of nils. map stops when it reaches the end of the first collection.
An (eager) loop/recur form that walks to end of longest:
(loop [c1 [0 1 2] c2 [:0 :1] o []]
(if (or (seq c1) (seq c2))
(recur (rest c1) (rest c2) (conj o (f (first c1) (first c2))))
o))
Or you could write a lazy version of map that did something similar.
A general lazy version, as suggested by Alex Miller's answer, is
(defn map-all [f & colls]
(lazy-seq
(when-not (not-any? seq colls)
(cons
(apply f (map first colls))
(apply map-all f (map rest colls))))))
For example,
(map-all vector [0 1 2] [:0 :1])
;([0 :0] [1 :1] [2 nil])
You would probably want to specialise map-all for one and two collections.
just for fun
this could easily be done with common lisp's do macro. We could implement it in clojure and do this (and much more fun things) with it:
(defmacro cl-do [clauses [end-check result] & body]
(let [clauses (map #(if (coll? %) % (list %)) clauses)
bindings (mapcat (juxt first second) clauses)
nexts (map #(nth % 2 (first %)) clauses)]
`(loop [~#bindings]
(if ~end-check
~result
(do
~#body
(recur ~#nexts))))))
and then just use it for mapping (notice it can operate on more than 2 colls):
(defn map-all [f & colls]
(cl-do ((colls colls (map next colls))
(res [] (conj res (apply f (map first colls)))))
((every? empty? colls) res)))
in repl:
user> (map-all vector [1 2 3] [:a :s] '[z x c v])
;;=> [[1 :a z] [2 :s x] [3 nil c] [nil nil v]]

Remove duplicate elements from two sequences

I am wondering how to remove duplicate elements from two sequences and combine two sequences. For instance,
user=>(remove-dup [1 4 7 10 16] [2 7 18 4])
(1 2 10 18 16)
My code is:
(defn remove-dup [l1 l2]
(let [list (concat l1 l2)]
(loop [l list res '()]
(if (>= (second (first (frequencies l))) 2)
(recur (rest l) res)
(recur (rest l) (conj res (first (first l))))))))
But when I run the code, I got the error message:
IllegalArgumentException Don't know how to create ISeq from: java.lang.Long clojure.lang.RT.seqFrom (RT.java:528)
How can I fix this code. Thanks!
Your error is here:
(first (first l))
Remember, l is the sequence of all the elements you haven't handled yet. For instance, in the first iteration of the loop, l might look like this:
(1 4 7 10 16 2 7 18 4)
You can see from this that (first l) would be 1, so (first (first l)) would be trying to treat a number as a sequence, which doesn't work.
If you replace (first (first l)) with just (first l), you'll get a NullPointerException because you don't have a base case: what should you do when l is empty? You might do something like this (where ,,, is a placeholder for your current if expression):
(if (empty? l)
res
,,,)
However, if we try to use the method now, we still don't get the right result:
(remove-dup [1 4 7 10 16] [2 7 18 4])
;=> (4 18 7 2 16 10 1)
Hrm.
I could try to fiddle with your code some more to get it to work, but there's a better way to solve this problem. Since you're trying to remove duplicates and you don't care about order, the functions in clojure.set are the right tool for the job here. I would write remove-dup like this:
(require '[clojure.set :as set])
(defn remove-dup [c1 c2]
(let [[s1 s2] (map set [c1 c2])]
(seq (set/difference (set/union s1 s2) (set/intersection s1 s2)))))
Example:
(remove-dup [1 4 7 10 16] [2 7 18 4])
;=> (1 2 16 10 18)
there is a number of fatal errors in your code:
The thing that breaks it, is (first (first l)), since l is the list of numbers, it throws an error when you try to take first item of number.
But there are more important ones:
first of all, even if your code were correct, it doesn't have any case to break the loop, so it would probably lead to the infinite loop (or exception of some kind). Second is your total misunderstanding of the frequencies usage. You can't rely on the order of the frequencies results, since it returns unordered map (not to mention it is beind called in every loop iteration, which is really bad for preformance).
That's how i would do something like this with a single pass over collections in loop:
(defn unique [coll1 coll2]
(let [items (concat coll1 coll2)]
(loop [res #{}
seen #{}
[x & xs :as items] items]
(cond ;; if there are no items left to check, returning result
(empty? items) res
;; if we've already seen the first item of a coll, remove it from the resulting set
(seen x) (recur (disj res x) seen xs)
;; otherwise mark it as seen, and add it to the result set
:else (recur (conj res x) (conj seen x) xs)))))
in repl:
user> (unique [1 4 7 10 16] [2 7 18 4])
#{1 2 16 10 18}
(defn remove-dupl [l1 l2]
(let [rmdup (fn [l1 l2] (remove (set l1) l2))]
(concat (rmdup l1 l2) (rmdup l2 l1))))
Try this solution
(defn remove-dup [l1 l2]
(let [ls (concat l1 l2)]
(loop [l (frequencies ls) res '()]
(if (empty? l) res
(if (>= (second (first l)) 2)
(recur (rest l) res)
(recur (rest l) (cons (first (first l)) res)))))))
The others have found your errors. I'd like to look at what you are trying to do.
Given that
the order is not important and
you are removing duplicate elements
this is the set operation exclusive or (XOR).
It is not included in clojure.set. We can either, as Sam Estep does, define it in terms of the operations we have, or write it more directly ourselves:
(defn exclusive-or [sa sb]
(if (<= (count sa) (count sb))
(reduce
(fn [ans a]
(if (contains? sb a)
(disj ans a)
(conj ans a)))
sb
sa)
(recur sb sa)))
We can then define
(defn remove-dup [xs ys]dited
(exclusive-or (set xs) (set ys))
For example,
(remove-dup [1 4 7 10 16] [2 7 18 4]) ;#{1 2 10 16 18}
Edited to correct error in exclusive-or.

How to define a general recurrence function in Clojure

I had an idea for a general function for recurrence relations in Clojure:
(defn recurrence [f inits]
(let [answer (lazy-seq (recurrence f inits))
windows (partition (count inits) 1 answer)]
(concat inits (lazy-seq (map f windows)))))
Then, for example, we can define the Fibonacci sequence as
(def fibs (recurrence (partial apply +) [0 1N]))
This works well enough for small numbers:
(take 10 fibs)
;(0 1N 1N 2N 3N 5N 8N 13N 21N 34N)
But it blows the stack if asked to realise a long sequence:
(first (drop 10000 fibs))
;StackOverflowError ...
Is there any way to overcome this?
The issue here is that you are building up calls to concat with every iteration, and the concat calls build up a big pile of unevaluated thunks that blow up when you finally ask for a value. By using cons and only passing forward the needed count of values (and concat, but not a recursive stack blowing concat), we get a better behaved lazy sequence:
user>
(defn recurrence
[f seed]
(let [step (apply f seed)
new-state (concat (rest seed) (list step))]
(lazy-seq (cons step (recurrence f new-state)))))
#'user/recurrence
user> (def fibs (recurrence +' [0 1]))
#'user/fibs
user> (take 10 fibs)
(1 2 3 5 8 13 21 34 55 89)
user> (first (drop 1000 fibs))
113796925398360272257523782552224175572745930353730513145086634176691092536145985470146129334641866902783673042322088625863396052888690096969577173696370562180400527049497109023054114771394568040040412172632376N
Starting from the accepted answer.
We want to start the sequence with the seed.
As the author suggests, we use a queue for efficiency. There's no need for a deque: clojure's PersistentQueue is all we need.
The adapted recurrence might look like this:
(defn recurrence
[f seed]
(let [init-window (into (clojure.lang.PersistentQueue/EMPTY) seed)
unroll (fn unroll [w] (lazy-seq (cons
(peek w)
(unroll (-> w
pop
(conj (apply f w)))))))]
(unroll init-window)))
... and, as before ...
(def fibs (recurrence +' [0 1]))
Then
(take 12 fibs)
;(0 1 1 2 3 5 8 13 21 34 55 89)
and
(first (drop 10002 fibs))
;88083137989997064605355872998857923445691333015376030932812485815888664307789011385238647061572694566755888008658862476758094375234981509702215595106015601812940878487465890539696395631360292400123725490667987980947195761919733084221263262792135552511961663188744083262743015393903228035182529922900769207624088879893951554938584166812233127685528968882435827903110743620870056104022290494963321073406865860606579792362403866826411642270661211435590340090149458419810817251120025713501918959350654895682804718752319215892119222907223279849851227166387954139546662644064653804466345416102543306712688251378793506564112970620367672131344559199027717813404940431009754143637417645359401155245658646088296578097547699141284451819782703782878668237441026255023475279003880007450550868002409533068098127495095667313120369142331519140185017719214501847645741030739351025342932514280625453085775191996236343792432215700850773568257988920265539647922172315902209901079830195949058505943508013044450503826167880993094540503572266189964694973263576375908606977788395730196227274629745722872833622300472769312273603346624292690875697438264265712313123637644491367875538847442013130532147345613099333195400845560466085176375175045485046787815133225349388996334014329318304865656815129208586686515835880811316065788759195646547703631454040090435955879604123186007481842117640574158367996845627012099571008761776991075470991386301988104753915798231741447012236434261594666985397841758348337030914623617101746431922708522824868155612811426016775968762121429282582582088871795463467796927317452368633552346819405423359738696980252707545944266042764236577381721803749442538053900196250284054406347238606575093877669323501452512412179883698552204038865069179867773579705703841178650618818357366165649529547898801198617541432893443650952033983923542592952070864044249738338089778163986683069566736505126466886304227253105034231716761535350441178724210841830855527586882822093246545813120624113290391593897765219320931179697869997243770533719319530526369830529543842405655495229382251039116426750156771132964376N
Another way, based on an idea stolen - I think - from Joy of Clojure, is ...
(defn recurrence
[f seed]
(let [init-window (into (clojure.lang.PersistentQueue/EMPTY) seed)
windows (iterate
(fn [w] (-> w, pop, (conj (apply f w))))
init-window)]
(map peek windows)))