Clojure: find repetition - clojure

Let's say we have a list of integers: 1, 2, 5, 13, 6, 5, 7 and I want to find the first number that repeats and return a vector of the two indices. In my sample, it's 5 at [2, 5]. What I did so far is loop, but can I do it more elegant, short way?
(defn get-cycle
[xs]
(loop [[x & xs_rest] xs, indices {}, i 0]
(if (nil? x)
[0 i] ; Sequence is over before we found a duplicate.
(if-let [x_index (indices x)]
[x_index i]
(recur xs_rest (assoc indices x i) (inc i))))))
No need to return number itself, because I can get it by index and, second, it may be not always there.

An option using list processing, but not significantly more concise:
(defn get-cycle [xs]
(first (filter #(number? (first %))
(reductions
(fn [[m i] x] (if-let [xat (m x)] [xat i] [(assoc m x i) (inc i)]))
[(hash-map) 0] xs))))

Here is a version using reduced to stop consuming the sequence when you find the first duplicate:
(defn first-duplicate [coll]
(reduce (fn [acc [idx x]]
(if-let [v (get acc x)]
(reduced (conj v idx))
(assoc acc x [idx])))
{} (map-indexed #(vector % %2) coll)))

I know that you have only asked for the first. Here is a fully lazy implementation with little per-step allocation overhead
(defn dups
[coll]
(letfn [(loop-fn [idx [elem & rest] cached]
(if elem
(if-let [last-idx (cached elem)]
(cons [last-idx idx]
(lazy-seq (loop-fn (inc idx) rest (dissoc cached elem))))
(lazy-seq (loop-fn (inc idx) rest (assoc cached elem idx))))))]
(loop-fn 0 coll {})))
(first (dups v))
=> [2 5]
Edit: Here are some criterium benchmarks:
The answer that got accepted: 7.819269 µs
This answer (first (dups [12 5 13 6 5 7])): 6.176290 µs
Beschastnys: 5.841101 µs
first-duplicate: 5.025445 µs

Actually, loop is a pretty good choice unless you want to find all duplicates. Things like reduce will cause the full scan of an input sequence even when it's not necessary.
Here is my version of get-cycle:
(defn get-cycle [coll]
(loop [i 0 seen {} coll coll]
(when-let [[x & xs] (seq coll)]
(if-let [j (seen x)]
[j i]
(recur (inc i) (assoc seen x i) xs)))))
The only difference from your get-cycle is that my version returns nil when there is no duplicates.

The intent of your code seems different from your description in the comments so I'm not totally confident I understand. That said, loop/recur is definitely a valid way to approach the problem.
Here's what I came up with:
(defn get-cycle [xs]
(loop [xs xs index 0]
(when-let [[x & more] (seq xs)]
(when-let [[y] (seq more)]
(if (= x y)
{x [index (inc index)]}
(recur more (inc index)))))))
This will return a map of the repeated item to a vector of the two indices the item was found at.
(get-cycle [1 1 2 1 2 4 2 1 4 5 6 7])
;=> {1 [0 1]}
(get-cycle [1 2 1 2 4 2 1 4 5 6 7 7])
;=> {7 [10 11]}
(get-cycle [1 2 1 2 4 2 1 4 5 6 7 8])
;=> nil
Here's an alternative solution using sequence functions. I like this way better but whether it's shorter or more elegant is probably subjective.
(defn pairwise [coll]
(map vector coll (rest coll)))
(defn find-first [pred xs]
(first (filter pred xs)))
(defn get-cycle [xs]
(find-first #(apply = (val (first %)))
(map-indexed hash-map (pairwise xs))))
Edited based on clarification from #demi
Ah, got it. Is this what you have in mind?
(defn get-cycle [xs]
(loop [xs (map-indexed vector xs)]
(when-let [[[i n] & more] (seq xs)]
(if-let [[j _] (find-first #(= n (second %)) more)]
{n [i j]}
(recur more)))))
I re-used find-first from my earlier sequence-based solution.

Related

idiomatic selection sort in clojure

I was trying to implement selection sort O(n2) in clojure. Yes the underlying sort uses the very efficient java's array sort. However this is a learning exercise.
The code below works, however I was wondering if there is a more idiomatic way of rewriting it, as the below seems clunky -
(defn mins [coll]
(reduce (fn[[min-coll rest-coll] val]
(case (compare val (first min-coll))
-1 [[val] (apply conj rest-coll min-coll)]
0 [(conj min-coll val) rest-coll]
1 [min-coll (conj rest-coll val)]))
[[(first coll)] []]
(rest coll)))
;; (mins [3 1 1 2]) => [[1 1] [3 2]]
(defn selection-sort [coll]
(loop [[sorted coll] [[] coll]]
(let [[s c] (mins coll)]
(if-not (seq coll)
sorted
(recur [(concat sorted s) c])))))
(selection-sort [3 1 1 2 5 7 8 8 4 6])
A functional solution could be:
(defn selection-sort
[input]
(let [ixs (vec (range (count input)))
min-key-from (fn [acc ix] (apply min-key acc (subvec ixs ix)))
swap (fn [coll i j] (assoc coll i (coll j) j (coll i)))]
(reduce
(fn [acc ix] (swap acc ix (min-key-from acc ix))) input ixs)))
You can use the following:
(defn remove-first [coll e]
(if-let [pos (and (seq coll) (.indexOf coll e))]
(vec (concat
(subvec coll 0 pos)
(subvec coll (inc pos))))
coll))
(defn best [coll f]
(reduce f
(first coll)
(rest coll)))
(defn select-sort
([coll] (select-sort coll min))
([coll fmin]
(loop [sorted (transient []) c (vec coll)]
(if (seq c)
(let [n (best c fmin)]
(recur (conj! sorted n) (remove-first c n)))
(persistent! sorted)))))
=> (select-sort [3 5 2])
=> [2 3 5]
=> (select-sort [3 5 2] max)
=> [5 3 2]

insert-sort with reduce clojure

I have function
(defn goneSeq [inseq uptil]
(loop [counter 0 newSeq [] orginSeq inseq]
(if (== counter uptil)
newSeq
(recur (inc counter) (conj newSeq (first orginSeq)) (rest orginSeq)))))
(defn insert [sorted-seq n]
(loop [currentSeq sorted-seq counter 0]
(cond (empty? currentSeq) (concat sorted-seq (vector n))
(<= n (first currentSeq)) (concat (goneSeq sorted-seq counter) (vector n) currentSeq)
:else (recur (rest currentSeq) (inc counter)))))
that takes in a sorted-sequence and insert the number n at its appropriate position for example: (insert [1 3 4] 2) returns [1 2 3 4].
Now I want to use this function with reduce to sort a given sequence so something like:
(reduce (insert seq n) givenSeq)
What is thr correct way to achieve this?
If the function works for inserting a single value, then this would work:
(reduce insert [] givenSeq)
for example:
user> (reduce insert [] [0 1 2 30.5 0.88 2.2])
(0 0.88 1 2 2.2 30.5)
Also, it should be noted that sort and sort-by are built in and are better than most hand-rolled solutions.
May I suggest some simpler ways to do insert:
A slowish lazy way is
(defn insert [s x]
(let [[fore aft] (split-with #(> x %) s)]
(concat fore (cons x aft))))
A faster eager way is
(defn insert [coll x]
(loop [fore [], coll coll]
(if (and (seq coll) (> x (first coll)))
(recur (conj fore x) (rest coll))
(concat fore (cons x coll)))))
By the way, you had better put your defns in bottom-up order, if possible. Use declare if there is mutual recursion. You had me thinking your solution did not compile.

Is there any better and more idiomatic way of taking "while not enough" from a seq?

I need to take some amount of elements from a sequence based on some quantity rule. Here is a solution I came up with:
(defn take-while-not-enough
[p len xs]
(loop [ac 0
r []
s xs]
(if (empty? s)
r
(let [new-ac (p ac (first s))]
(if (>= new-ac len)
r
(recur new-ac (conj r (first s)) (rest s)))))))
(take-while-not-enough + 10 [2 5 7 8 2 1]) ; [2 5]
(take-while-not-enough #(+ %1 (%2 1)) 7 [[2 5] [7 8] [2 1]]) ; [[2 5]]
Is there any better way to achieve the same?
Thanks.
UPDATE:
Somebody posted that solution, but then removed it. It does the same is the answer that I accepted, but is more readable. Thank you, anonymous well-wisher!
(defn take-while-not-enough [reducer-fn limit data]
(->> (reductions reducer-fn 0 data) ; 1. the sequence of accumulated values
(map vector data) ; 2. paired with the original sequence
(take-while #(< (second %) limit)) ; 3. until a certain accumulated value
(map first))) ; 4. then extract the original values
My first thought is to view this problem as a variation on reduce and thus to break the problem into two steps:
count the number of items in the result
take that many from the input
I also took some liberties with the argument names:
user> (defn take-while-not-enough [reducer-fn limit data]
(take (dec (count (take-while #(< % limit) (reductions reducer-fn 0 data))))
data))
#'user/take-while-not-enough
user> (take-while-not-enough #(+ %1 (%2 1)) 7 [[2 5] [7 8] [2 1]])
([2 5])
user> (take-while-not-enough + 10 [2 5 7 8 2 1])
(2 5)
This returns a sequence and your examples return a vector, if this is important then you can add a call to vec
Something that would traverse the input sequence only once:
(defn take-while-not-enough [r v data]
(->> (rest (reductions (fn [s i] [(r (s 0) i) i]) [0 []] data))
(take-while (comp #(< % v) first))
(map second)))
Well, if you want to use flatland/useful, this is a kinda-okay way to use glue:
(defn take-while-not-enough [p len xs]
(first (glue conj []
(constantly true)
#(>= (reduce p 0 %) len)
xs)))
But it's rebuilding the accumulator for the entire "processed so far" chunk every time it decides whether to grow the chunk more, so it's O(n^2), which will be unacceptable for larger inputs.
The most obvious improvement to your implementation is to make it lazy instead of tail-recursive:
(defn take-while-not-enough [p len xs]
((fn step [acc coll]
(lazy-seq
(when-let [xs (seq coll)]
(let [x (first xs)
acc (p acc x)]
(when-not (>= acc len)
(cons x (step acc xs)))))))
0 xs))
Sometimes lazy-seq is straight-forward and self-explaining.
(defn take-while-not-enough
([f limit coll] (take-while-not-enough f limit (f) coll))
([f limit acc coll]
(lazy-seq
(when-let [s (seq coll)]
(let [fst (first s)
nacc (f acc fst)]
(when (< nxt-sd limit)
(cons fst (take-while-not-enough f limit nacc (rest s)))))))))
Note: f is expected to follow the rules of reduce.

Partition by a seq of integers

What would be a more idiomatic way to partition a seq based on a seq of integers instead of just one integer?
Here's my implementation:
(defn partition-by-seq
"Return a lazy sequence of lists with a variable number of items each
determined by the n in ncoll. Extra values in coll are dropped."
[ncoll coll]
(let [partition-coll (mapcat #(repeat % %) ncoll)]
(->> coll
(map vector partition-coll)
(partition-by first)
(map (partial map last)))))
Then (partition-by-seq [2 3 6] (range)) yields ((0 1) (2 3 4) (5 6 7 8 9 10)).
Your implementation looks fine, but there could be a more simple solution which uses simple recursion wrapped in lazy-seq(and turns out to be more efficient) than using map and existing partition-by as in your case.
(defn partition-by-seq [ncoll coll]
(if (empty? ncoll)
'()
(let [n (first ncoll)]
(cons (take n coll)
(lazy-seq (partition-by-seq (rest ncoll) (drop n coll)))))))
A variation on Ankur's answer, with a minor addition of laziness and when-let instead of an explicit test for empty?.
(defn partition-by-seq [parts coll]
(lazy-seq
(when-let [s (seq parts)]
(cons
(take (first s) coll)
(partition-by-seq (rest s) (nthrest coll (first s)))))))
(first (reduce (fn [[r l] n]
[(conj r (take n l)) (drop n l)])
[[] (range)]
[2 3 6]))
=> [(0 1) (2 3 4) (5 6 7 8 9 10)]

How do I find the index of an item in a vector?

Any ideas what ???? should be? Is there a built in?
What would be the best way to accomplish this task?
(def v ["one" "two" "three" "two"])
(defn find-thing [ thing vectr ]
(????))
(find-thing "two" v) ; ? maybe 1, maybe '(1,3), actually probably a lazy-seq
Built-in:
user> (def v ["one" "two" "three" "two"])
#'user/v
user> (.indexOf v "two")
1
user> (.indexOf v "foo")
-1
If you want a lazy seq of the indices for all matches:
user> (map-indexed vector v)
([0 "one"] [1 "two"] [2 "three"] [3 "two"])
user> (filter #(= "two" (second %)) *1)
([1 "two"] [3 "two"])
user> (map first *1)
(1 3)
user> (map first
(filter #(= (second %) "two")
(map-indexed vector v)))
(1 3)
Stuart Halloway has given a really nice answer in this post http://www.mail-archive.com/clojure#googlegroups.com/msg34159.html.
(use '[clojure.contrib.seq :only (positions)])
(def v ["one" "two" "three" "two"])
(positions #{"two"} v) ; -> (1 3)
If you wish to grab the first value just use first on the result.
(first (positions #{"two"} v)) ; -> 1
EDIT: Because clojure.contrib.seq has vanished I updated my answer with an example of a simple implementation:
(defn positions
[pred coll]
(keep-indexed (fn [idx x]
(when (pred x)
idx))
coll))
(defn find-thing [needle haystack]
(keep-indexed #(when (= %2 needle) %1) haystack))
But I'd like to warn you against fiddling with indices: most often than not it's going to produce less idiomatic, awkward Clojure.
As of Clojure 1.4 clojure.contrib.seq (and thus the positions function) is not available as it's missing a maintainer:
http://dev.clojure.org/display/design/Where+Did+Clojure.Contrib+Go
The source for clojure.contrib.seq/positions and it's dependency clojure.contrib.seq/indexed is:
(defn indexed
"Returns a lazy sequence of [index, item] pairs, where items come
from 's' and indexes count up from zero.
(indexed '(a b c d)) => ([0 a] [1 b] [2 c] [3 d])"
[s]
(map vector (iterate inc 0) s))
(defn positions
"Returns a lazy sequence containing the positions at which pred
is true for items in coll."
[pred coll]
(for [[idx elt] (indexed coll) :when (pred elt)] idx))
(positions #{2} [1 2 3 4 1 2 3 4]) => (1 5)
Available here: http://clojuredocs.org/clojure_contrib/clojure.contrib.seq/positions
I was attempting to answer my own question, but Brian beat me to it with a better answer!
(defn indices-of [f coll]
(keep-indexed #(if (f %2) %1 nil) coll))
(defn first-index-of [f coll]
(first (indices-of f coll)))
(defn find-thing [value coll]
(first-index-of #(= % value) coll))
(find-thing "two" ["one" "two" "three" "two"]) ; 1
(find-thing "two" '("one" "two" "three")) ; 1
;; these answers are a bit silly
(find-thing "two" #{"one" "two" "three"}) ; 1
(find-thing "two" {"one" "two" "two" "three"}) ; nil
Here's my contribution, using a looping structure and returning nil on failure.
I try to avoid loops when I can, but it seems fitting for this problem.
(defn index-of [xs x]
(loop [a (first xs)
r (rest xs)
i 0]
(cond
(= a x) i
(empty? r) nil
:else (recur (first r) (rest r) (inc i)))))
I recently had to find indexes several times or rather I chose to since it was easier than figuring out another way of approaching the problem. Along the way I discovered that my Clojure lists didn't have the .indexOf(Object object, int start) method. I dealt with the problem like so:
(defn index-of
"Returns the index of item. If start is given indexes prior to
start are skipped."
([coll item] (.indexOf coll item))
([coll item start]
(let [unadjusted-index (.indexOf (drop start coll) item)]
(if (= -1 unadjusted-index)
unadjusted-index
(+ unadjusted-index start)))))
We don't need to loop the whole collection if we need the first index. The some function will short circuit after the first match.
(defn index-of [x coll]
(let [idx? (fn [i a] (when (= x a) i))]
(first (keep-indexed idx? coll))))
I'd go with reduce-kv
(defn find-index [pred vec]
(reduce-kv
(fn [_ k v]
(if (pred v)
(reduced k)))
nil
vec))