Remove n instances of matched elements from collection - clojure

What is the best way to remove n instances of matched elements of collection-2 from collection-1?
(let [coll-1 [8 2]
coll-2 [8 8 8 2]
Here's what I first came up with to solve original problem:
...
;; (remove (set coll-1) coll-2))
;; --> ()
But realised I must achieve:
...
;; (some-magic coll-1 coll-2))
;; --> (8 8)
Clarification:
(some-magic {8 2} [8 8 8 2]) ;;Removes 1x8 and 1x2 from vector.
(some-magic {8 8 2} [8 8 8 2]) ;;Removes 2x8 and 1x2 from vector.
Edit:
Preserving the order is desired.

Here is a lazy solution, written in the style of distinct:
(defn some-magic [count-map coll]
(let [step (fn step [xs count-map]
(lazy-seq
((fn [[f :as xs] count-map]
(when-let [s (seq xs)]
(if (pos? (get count-map f 0))
(recur (rest s) (update-in count-map [f] dec))
(cons f (step (rest s) count-map)))))
xs count-map)))]
(step coll count-map)))
The first argument needs to be a map indicating how many of each value to remove:
(some-magic {8 1, 2 1} [8 8 8 2]) ;; Removes 1x8 and 1x2
;=> (8 8)
(some-magic {8 2, 2 1} [8 8 8 2]) ;; Removes 2x8 and 1x2
;=> (8)
Here is an example dealing with falsey values and infinite input:
(take 10 (some-magic {3 4, 2 2, nil 1} (concat [3 nil 3 false nil 3 2] (range))))
;=> (false nil 0 1 4 5 6 7 8 9)

I don't see any of the built in sequence manipulation functions quite solving this, though a straitforward loop can build the result nicely:
user> (loop [coll-1 (set coll-1) coll-2 coll-2 result []]
(if-let [[f & r] coll-2]
(if (coll-1 f)
(recur (disj coll-1 f) r result)
(recur coll-1 r (conj result f)))
result))
[8 8]

Related

split a sequence by delimiter in clojure?

Say I have a sequence in clojure like
'(1 2 3 6 7 8)
and I want to split it up so that the list splits whenever an element divisible by 3 is encountered, so that the result looks like
'((1 2) (3) (6 7 8))
(EDIT: What I actually need is
[[1 2] [3] [6 7 8]]
, but I'll take the sequence version too : )
What is the best way to do this in clojure?
partition-by is no help:
(partition-by #(= (rem % 3) 0) '(1 2 3 6 7 8))
; => ((1 2) (3 6) (7 8))
split-with is close:
(split-with #(not (= (rem % 3) 0)) '(1 2 3 6 7 8))
; => [(1 2) (3 6 7 8)]
Something like this?
(defn partition-with
[f coll]
(lazy-seq
(when-let [s (seq coll)]
(let [run (cons (first s) (take-while (complement f) (next s)))]
(cons run (partition-with f (seq (drop (count run) s))))))))
(partition-with #(= (rem % 3) 0) [1 2 3 6 7 8 9 12 13 15 16 17 18])
=> ((1 2) (3) (6 7 8) (9) (12 13) (15 16 17) (18))
This is an interesting problem. I recently added a function split-using to the Tupelo library, which seems like a good fit here. I left the spyx debug statements in the code below so you can see how things progress:
(ns tst.clj.core
(:use clojure.test tupelo.test)
(:require
[tupelo.core :as t] ))
(t/refer-tupelo)
(defn start-segment? [vals]
(zero? (rem (first vals) 3)))
(defn partition-using [pred vals-in]
(loop [vals vals-in
result []]
(if (empty? vals)
result
(t/spy-let [
out-first (take 1 vals)
[out-rest unprocessed] (split-using pred (spyx (next vals)))
out-vals (glue out-first out-rest)
new-result (append result out-vals)]
(recur unprocessed new-result)))))
Which gives us output like:
out-first => (1)
(next vals) => (2 3 6 7 8)
[out-rest unprocessed] => [[2] (3 6 7 8)]
out-vals => [1 2]
new-result => [[1 2]]
out-first => (3)
(next vals) => (6 7 8)
[out-rest unprocessed] => [[] [6 7 8]]
out-vals => [3]
new-result => [[1 2] [3]]
out-first => (6)
(next vals) => (7 8)
[out-rest unprocessed] => [[7 8] ()]
out-vals => [6 7 8]
new-result => [[1 2] [3] [6 7 8]]
(partition-using start-segment? [1 2 3 6 7 8]) => [[1 2] [3] [6 7 8]]
or for a larger input vector:
(partition-using start-segment? [1 2 3 6 7 8 9 12 13 15 16 17 18 18 18 3 4 5])
=> [[1 2] [3] [6 7 8] [9] [12 13] [15 16 17] [18] [18] [18] [3 4 5]]
You could also create a solution using nested loop/recur, but that is already coded up in the split-using function:
(defn split-using
"Splits a collection based on a predicate with a collection argument.
Finds the first index N such that (pred (drop N coll)) is true. Returns a length-2 vector
of [ (take N coll) (drop N coll) ]. If pred is never satisified, [ coll [] ] is returned."
[pred coll]
(loop [left []
right (vec coll)]
(if (or (empty? right) ; don't call pred if no more data
(pred right))
[left right]
(recur (append left (first right))
(rest right)))))
Actually, the above function seems like it would be useful in the future. partition-using has now been added to the Tupelo library.
and one more old school reduce-based solution:
user> (defn split-all [pred items]
(when (seq items)
(apply conj (reduce (fn [[acc curr] x]
(if (pred x)
[(conj acc curr) [x]]
[acc (conj curr x)]))
[[] []] items))))
#'user/split-all
user> (split-all #(zero? (rem % 3)) '(1 2 3 6 7 8 10 11 12))
;;=> [[1 2] [3] [6 7 8 10 11] [12]]

Clojure; select all nth element from list of lists with unequal size, for n = 1, 2,

I'd like to have a function, such that,
(f '([1 4 7] [2 5 9] [3 6]))
would give
([1 2 3] [4 5 6] [7 9])
I tried
(apply map vector '([1 4 7] [2 5 9] [3 6]))
would only produce:
([1 2 3] [4 5 6])
I find it hard to describe my requirements that it's difficult for me to search for a ready solution.
Please help me either to improve my description, or pointer to a solution.
Thanks in advance!
I'd solve a more general problem which means you might reuse that function in the future. I'd change map so that it keeps going past the smallest map.
(defn map-all
"Like map but if given multiple collections will call the function f
with as many arguments as there are elements still left."
([f] (map f))
([f coll] (map f coll))
([f c1 & colls]
(let [step (fn step [cs]
(lazy-seq
(let [ss (keep seq cs)]
(when (seq ss)
(cons (map first ss)
(step (map rest ss)))))))]
(map #(apply f %) (step (conj colls c1))))))
(apply map-all vector '([1 4 7] [2 5 9] [3 6]))
(apply map-all vector '([1 false 7] [nil 5 9] [3 6] [8]))
Note, that as opposed to many other solutions, this one works fine even if any of the sequences contain nil or false.
or this way with loop/recur:
user> (defn transpose-all-2 [colls]
(loop [colls colls res []]
(if-let [colls (seq (filter seq colls))]
(recur (doall (map next colls))
(conj res (mapv first colls)))
res)))
#'user/transpose-all-2
user> (transpose-all-2 x)
[[1 2 3] [4 5 6] [7 9]]
user> (transpose-all-2 '((0 1 2 3) (4 5 6 7) (8 9)))
[[0 4 8] [1 5 9] [2 6] [3 7]]
If you know the maximum length of the vectors ahead of time, you could define
(defn tx [colls]
(lazy-seq
(cons (filterv identity (map first colls))
(tx (map rest colls)))))
then
(take 3 (tx '([1 4 7] [2 5 9] [3 6])))
A simple solution is
(defn transpose-all
[colls]
(lazy-seq
(let [ss (keep seq colls)]
(when (seq ss)
(cons (map first ss) (transpose-all (map rest ss)))))))
For example,
(transpose-all '([1 4 7] [2 5 9] [3 6] [11 12 13 14]))
;((1 2 3 11) (4 5 6 12) (7 9 13) (14))
Here is my own attempt:
(defn f [l]
(let [max-count (apply max (map count l))
l-patched (map (fn [e] (if (< (count e) max-count)
(concat e (take (- max-count (count e)) (repeat nil)))
e)) l)]
(map (fn [x] (filter identity x)) (apply map vector l-patched))
))
Another simple solution:
(->> jagged-list
(map #(concat % (repeat nil)))
(apply map vector)
(take-while (partial some identity)))
A jagged-list like this
'([1 4 7 ]
[2 5 9 ]
[3 6 ]
[11 12 13 14])
will produce:
'([1 2 3 11]
[4 5 6 12]
[7 9 nil 13]
[nil nil nil 14])
Here is another go that doesn't require you to know the vector length in advance:
(defn padzip [& [colls]]
(loop [acc [] colls colls]
(if (every? empty? colls) acc
(recur (conj acc (filterv some?
(map first colls))) (map rest colls)))))

clojure: pop and push

I'm looking for a sequential data structure which is perfect for the following operation. The lenght of the list remains constant, it will never be longer or shorter than a fixed length.
Omit the first item and add x to the end.
(0 1 2 3 4 5 6 7 8 9)
(pop-and-push "10")
(1 2 3 4 5 6 7 8 9 10)
There is only one other reading-operation that has to be done equally often:
(last coll)
pop-and-push could be implemented like this:
(defn pop-and-push [coll x]
(concat (pop coll) ["x"]))
(unfortunately this does not work with sequences produced by e.g. range, it just pops when the sequence declared by the literals '(..) is passed.)
but is this optimal?
The main issue here (once we change "x" to x) is that concat returns a lazy-seq, and lazy-seqs are invalid args to pop.
user=> (defn pop-and-push [coll x] (concat (pop coll) [x]))
#'user/pop-and-push
user=> (pop-and-push [1 2 3] 4)
(1 2 4)
user=> (pop-and-push *1 5)
ClassCastException clojure.lang.LazySeq cannot be cast to clojure.lang.IPersistentStack clojure.lang.RT.pop (RT.java:730)
My naive preference would be to use a vector. This function is easy to implement with subvec.
user=> (defn pop-and-push [v x] (conj (subvec (vec v) 1) x))
#'user/pop-and-push
user=> (pop-and-push [1 2 3] 4)
[2 3 4]
user=> (pop-and-push *1 5)
[3 4 5]
as you can see, this version can actually operate on its own return value
As suggested in the comments, PersistentQueue is made for this situation:
user=> (defn pop-and-push [v x] (conj (pop v) x))
#'user/pop-and-push
user=> (pop-and-push (into clojure.lang.PersistentQueue/EMPTY [1 2 3]) 4)
#object[clojure.lang.PersistentQueue 0x50313382 "clojure.lang.PersistentQueue#7c42"]
user=> (into [] *1)
[2 3 4]
user=> (pop-and-push *2 5)
#object[clojure.lang.PersistentQueue 0x4bd31064 "clojure.lang.PersistentQueue#8023"]
user=> (into [] *1)
[3 4 5]
The PersistentQueue data structure, though less convenient to use in some ways, is actually optimized for this usage.

clojure: partition a seq based on a seq of values

I would like to partition a seq, based on a seq of values
(partition-by-seq [3 5] [1 2 3 4 5 6])
((1 2 3)(4 5)(6))
The first input is a seq of split points.
The second input is a seq i would like to partition.
So, that the first list will be partitioned at the value 3 (1 2 3) and the second partition will be (4 5) where 5 is the next split point.
another example:
(partition-by-seq [3] [2 3 4 5])
result: ((2 3)(4 5))
(partition-by-seq [2 5] [2 3 5 6])
result: ((2)(3 5)(6))
given: the first seq (split points) is always a subset of the second input seq.
I came up with this solution which is lazy and quite (IMO) straightforward.
(defn part-seq [splitters coll]
(lazy-seq
(when-let [s (seq coll)]
(if-let [split-point (first splitters)]
; build seq until first splitter
(let [run (cons (first s) (take-while #(<= % split-point) (next s)))]
; build the lazy seq of partitions recursively
(cons run
(part-seq (rest splitters) (drop (count run) s))))
; just return one partition if there is no splitter
(list coll)))))
If the split points are all in the sequence:
(part-seq [3 5 8] [0 1 2 3 4 5 6 7 8 9])
;;=> ((0 1 2 3) (4 5) (6 7 8) (9))
If some split points are not in the sequence
(part-seq [3 5 8] [0 1 2 4 5 6 8 9])
;;=> ((0 1 2) (4 5) (6 8) (9))
Example with some infinite sequences for the splitters and the sequence to split.
(take 5 (part-seq (iterate (partial + 3) 5) (range)))
;;=> ((0 1 2 3 4 5) (6 7 8) (9 10 11) (12 13 14) (15 16 17))
the sequence to be partitioned is a splittee and the elements of split-points (aka. splitter) marks the last element of a partition.
from your example:
splittee: [1 2 3 4 5 6]
splitter: [3 5]
result: ((1 2 3)(4 5)(6))
Because the resulting partitions is always a increasing integer sequence and increasing integer sequence of x can be defined as start <= x < end, the splitter elements can be transformed into end of a sequence according to the definition.
so, from [3 5], we want to find subsequences ended with 4 and 6.
then by adding the start, the splitter can be transformed into sequences of [start end]. The start and end of the splittee is also used.
so, the splitter [3 5] then becomes:
[[1 4] [4 6] [6 7]]
splitter transformation could be done like this
(->> (concat [(first splittee)]
(mapcat (juxt inc inc) splitter)
[(inc (last splittee))])
(partition 2)
there is a nice symmetry between transformed splitter and the desired result.
[[1 4] [4 6] [6 7]]
((1 2 3) (4 5) (6))
then the problem becomes how to extract subsequences inside splittee that is ranged by [start end] inside transformed splitter
clojure has subseq function that can be used to find a subsequence inside ordered sequence by start and end criteria. I can just map the subseq of splittee for each elements of transformed-splitter
(map (fn [[x y]]
(subseq (apply sorted-set splittee) <= x < y))
transformed-splitter)
by combining the steps above, my answer is:
(defn partition-by-seq
[splitter splittee]
(->> (concat [(first splittee)]
(mapcat (juxt inc inc) splitter)
[(inc (last splittee))])
(partition 2)
(map (fn [[x y]]
(subseq (apply sorted-set splittee) <= x < y)))))
This is the solution i came up with.
(def a [1 2 3 4 5 6])
(def p [2 4 5])
(defn partition-by-seq [s input]
(loop [i 0
t input
v (transient [])]
(if (< i (count s))
(let [x (split-with #(<= % (nth s i)) t)]
(recur (inc i) (first (rest x)) (conj! v (first x))))
(do
(conj! v t)
(filter #(not= (count %) 0) (persistent! v))))))
(partition-by-seq p a)

Changing map behaviour in Clojure

I need to modify map function behavior to provide mapping not with minimum collection size but with maximum and use zero for missing elements.
Standard behavior:
(map + [1 2 3] [4 5 6 7 8]) => [5 7 9]
Needed behavior:
(map + [1 2 3] [4 5 6 7 8]) => [5 7 9 7 8]
I wrote function to do this, but it seems not very extensible with varargs.
(defn map-ext [f coll1 coll2]
(let [mx (max (count coll1) (count coll2))]
(map f
(concat coll1 (repeat (- mx (count coll1)) 0))
(concat coll2 (repeat (- mx (count coll2)) 0)))))
Is there a better way to do this?
Your method is concise, but inefficient (it calls count). A more efficient solution, which does not require the entirety of its input sequences to be stored in memory follows:
(defn map-pad [f pad & colls]
(lazy-seq
(let [seqs (map seq colls)]
(when (some identity seqs)
(cons (apply f (map #(or (first %) pad) seqs))
(apply map-pad f pad (map rest seqs)))))))
Used like this:
user=> (map-pad + 0 [] [1] [1 1] (range 1 10))
(3 3 3 4 5 6 7 8 9)
Edit: Generalized map-pad to arbitrary arity.
Another lazy variant, usable with an arbitrary number of input sequences:
(defn map-ext [f ext & seqs]
(lazy-seq
(if (some seq seqs)
(cons (apply f (map #(if (seq %) (first %) ext) seqs))
(apply map-ext f ext (map rest seqs)))
())))
Usage:
user> (map-ext + 0 [1 2 3] [4 5 6 7 8])
(5 7 9 7 8)
user> (map-ext + 0 [1 2 3] [4 5 6 7 8] [3 4])
(8 11 9 7 8)
If you just want it to work for any number of collections, try:
(defn map-ext [f & colls]
(let [mx (apply max (map count colls))]
(apply map f (map #(concat % (repeat (- mx (count %)) 0)) colls))))
Clojure> (map-ext + [1 2] [1 2 3] [1 2 3 4])
(3 6 6 4)
I suspect there may be better solutions though (as Trevor Caira suggests, this solution isn't lazy due to the calls to count).
How about that:
(defn map-ext [f x & xs]
(let [colls (cons x xs)
res (apply map f colls)
next (filter not-empty (map #(drop (count res) %) colls))]
(if (empty? next) res
(lazy-seq (concat res (apply map-ext f next))))))
user> (map-ext + [1 2 3] [4] [5 6] [7 8 9 10])
(17 16 12 10)
Along the lines of #LeNsTR's solution, but simpler and faster:
(defn map-ext [f & colls]
(lazy-seq
(let [colls (filter seq colls)
firsts (map first colls)
rests (map rest colls)]
(when (seq colls)
(cons (apply f firsts) (apply map-ext f rests))))))
(map-ext + [1 2 3] [4] [5 6] [7 8 9 10])
;(17 16 12 10)
I've just noticed Michał Marczyk's accepted solution, which is superior: it deals properly with asymmetric mapping functions such as -.
We can make Michał Marczyk's answer neater by using the convention - which many core functions follow - that you get a default or identity value by calling the function with no arguments. For examples:
(+) ;=> 0
(concat) ;=> ()
The code becomes
(defn map-ext [f & seqs]
(lazy-seq
(when (some seq seqs)
(cons (apply f (map #(if (seq %) (first %) (f)) seqs))
(apply map-ext f (map rest seqs)))
)))
(map-ext + [1 2 3] [4 5 6 7 8] [3 4])
;(8 11 9 7 8)
I've made the minimum changes. It could be speeded up a bit.
We may need a function that will inject such a default value into a function that lacks it:
(defn with-default [f default]
(fn
([] default)
([& args] (apply f args))))
((with-default + 6)) ;=> 6
((with-default + 6) 7 8) ;=> 15
This could be speeded up or even turned into a macro.