a version of `sequence` that doesn't do chunking - clojure

I'd like to have a version of sequence that doesn't do the chunking of 32 elements. Currently, this code will output
(def peek #(doto % (print " ")))
(def pause #(do (Thread/sleep 10)
%))
(take 2 (->> (range 100)
(sequence (comp (map peek)
(map pause)
(map inc)))))
;; prints 0 1 2 3 4 <..etc..> 32
;; => (0, 1)
I'd like a version of it so that it only iterates through the elements that it needs
(take 2 (->> (range 100)
(iter-sequence (comp (map peek)
(map pause)
(map inc)))))
;; prints 0 1
;; => (0, 1)
Is there a way to do this?

I had to change a couple of things to get it working. The first is to cut and paste sequence code and replace clojure.lang.RT/chunkIteratorSeq with an alternative version of clojure.lang.IteratorSeq that has exposed public constructor methods.
The reason being is that the clojure.lang.IteratorSeq/create has a check to iter.next() on L27 which will block if the source is blocking.
(defn seqiter
{:added "1.0"
:static true}
([coll] coll)
([xform coll]
(IteratorSeq.
(TransformerIterator/create xform (clojure.lang.RT/iter coll))))
([xform coll & colls]
(IteratorSeq.
(TransformerIterator/createMulti
xform
(map #(clojure.lang.RT/iter %) (cons coll colls))))))

Related

Return an else value when using recur

I am new to Clojure, and doing my best to forget all my previous experience with more procedural languages (java, ruby, swift) and embrace Clojure for what it is. I am actually really enjoying the way it makes me think differently -- however, I have come up against a pattern that I just can't seem to figure out. The easiest way to illustrate, is with some code:
(defn char-to-int [c] (Integer/valueOf (str c)))
(defn digits-dont-decrease? [str]
(let [digits (map char-to-int (seq str)) i 0]
(when (< i 5)
(if (> (nth digits i) (nth digits (+ i 1)))
false
(recur (inc i))))))
(def result (digits-dont-decrease? "112233"))
(if (= true result)
(println "fit rules")
(println "doesn't fit rules"))
The input is a 6 digit number as a string, and I am simply attempting to make sure that each digit from left to right is >= the previous digit. I want to return false if it doesn't, and true if it does. The false situation works great -- however, given that recur needs to be the last thing in the function (as far as I can tell), how do I return true. As it is, when the condition is satisfied, I get an illegal argument exception:
Execution error (IllegalArgumentException) at clojure.exercise.two/digits-dont-decrease? (four:20).
Don't know how to create ISeq from: java.lang.Long
How should I be thinking about this? I assume my past training is getting in my mental way.
This is not answering your question, but also shows an alternative. While the (apply < ...) approach over the whole string is very elegant for small strings (it is eager), you can use every? for an short-circuiting approach. E.g.:
user=> (defn nr-seq [s] (map #(Integer/parseInt (str %)) s))
#'user/nr-seq
user=> (every? (partial apply <=) (partition 2 1 (nr-seq "123")))
true
You need nothing but
(apply <= "112233")
Reason: string is a sequence of character and comparison operator works on character.
(->> "0123456789" (mapcat #(repeat 1000 %)) (apply str) (def loooong))
(count loooong)
10000
(time (apply <= loooong))
"Elapsed time: 21.006625 msecs"
true
(->> "9123456789" (mapcat #(repeat 1000 %)) (apply str) (def bad-loooong))
(count bad-loooong)
10000
(time (apply <= bad-loooong))
"Elapsed time: 2.581750 msecs"
false
(above runs on my iPhone)
In this case, you don't really need loop/recur. Just use the built-in nature of <= like so:
(ns tst.demo.core
(:use demo.core tupelo.core tupelo.test))
(def true-samples
["123"
"112233"
"13"])
(def false-samples
["10"
"12324"])
(defn char->int
[char-or-str]
(let [str-val (str char-or-str)] ; coerce any chars to len-1 strings
(assert (= 1 (count str-val)))
(Integer/parseInt str-val)))
(dotest
(is= 5 (char->int "5"))
(is= 5 (char->int \5))
(is= [1 2 3] (mapv char->int "123"))
; this shows what we are going for
(is (<= 1 1 2 2 3 3))
(isnt (<= 1 1 2 1 3 3))
and now test the char sequences:
;-----------------------------------------------------------------------------
; using built-in `<=` function
(doseq [true-samp true-samples]
(let [digit-vals (mapv char->int true-samp)]
(is (apply <= digit-vals))))
(doseq [false-samp false-samples]
(let [digit-vals (mapv char->int false-samp)]
(isnt (apply <= digit-vals))))
if you want to write your own, you can like so:
(defn increasing-equal-seq?
"Returns true iff sequence is non-decreasing"
[coll]
(when (< (count coll) 2)
(throw (ex-info "coll must have at least 2 vals" {:coll coll})))
(loop [prev (first coll)
remaining (rest coll)]
(if (empty? remaining)
true
(let [curr (first remaining)
prev-next curr
remaining-next (rest remaining)]
(if (<= prev curr)
(recur prev-next remaining-next)
false)))))
;-----------------------------------------------------------------------------
; using home-grown loop/recur
(doseq [true-samp true-samples]
(let [digit-vals (mapv char->int true-samp)]
(is (increasing-equal-seq? digit-vals))))
(doseq [false-samp false-samples]
(let [digit-vals (mapv char->int false-samp)]
(isnt (increasing-equal-seq? digit-vals))))
)
with result
-------------------------------
Clojure 1.10.1 Java 13
-------------------------------
Testing tst.demo.core
Ran 2 tests containing 15 assertions.
0 failures, 0 errors.
Passed all tests
Finished at 23:36:17.096 (run time: 0.028s)
You an use loop with recur.
Assuming you require following input v/s output -
"543221" => false
"54321" => false
"12345" => true
"123345" => true
Following function can help
;; Assuming char-to-int is defined by you before as per the question
(defn digits-dont-decrease?
[strng]
(let [digits (map char-to-int (seq strng))]
(loop [;;the bindings in loop act as initial state
decreases true
i (- (count digits) 2)]
(let [decreases (and decreases (>= (nth digits (+ i 1)) (nth digits i)))]
(if (or (< i 1) (not decreases))
decreases
(recur decreases (dec i)))))))
This should work for numeric string of any length.
Hope this helps. Please let me know if you were looking for something else :).
(defn non-decreasing? [str]
(every?
identity
(map
(fn [a b]
(<= (int a) (int b)))
(seq str)
(rest str))))
(defn non-decreasing-loop? [str]
(loop [a (seq str) b (rest str)]
(if-not (seq b)
true
(if (<= (int (first a)) (int (first b)))
(recur (rest a) (rest b))
false))))
(non-decreasing? "112334589")
(non-decreasing? "112324589")
(non-decreasing-loop? "112334589")
(non-decreasing-loop? "112324589")

Implementing Clojure conditional/branching transducer

I'm trying to make a conditional transducer in Clojure as follows:
(defn if-xf
"Takes a predicate and two transducers.
Returns a new transducer that routes the input to one of the transducers
depending on the result of the predicate."
[pred a b]
(fn [rf]
(let [arf (a rf)
brf (b rf)]
(fn
([] (rf))
([result]
(rf result))
([result input]
(if (pred input)
(arf result input)
(brf result input)))))))
It is pretty useful in that it lets you do stuff like this:
;; multiply odd numbers by 100, square the evens.
(= [0 100 4 300 16 500 36 700 64 900]
(sequence
(if-xf odd? (map #(* % 100)) (map (fn [x] (* x x))))
(range 10)))
However, this conditional transducer does not work very well with transducers that perform cleanup in their 1-arity branch:
;; negs are multiplied by 100, non-negs are partitioned by 2
;; BUT! where did 6 go?
;; expected: [-600 -500 -400 -300 -200 -100 [0 1] [2 3] [4 5] [6]]
;;
(= [-600 -500 -400 -300 -200 -100 [0 1] [2 3] [4 5]]
(sequence
(if-xf neg? (map #(* % 100)) (partition-all 2))
(range -6 7)))
Is it possible to tweak the definition of if-xf to handle the case of transducers with cleanup?
I'm trying this, but with weird behavior:
(defn if-xf
"Takes a predicate and two transducers.
Returns a new transducer that routes the input to one of the transducers
depending on the result of the predicate."
[pred a b]
(fn [rf]
(let [arf (a rf)
brf (b rf)]
(fn
([] (rf))
([result]
(arf result) ;; new!
(brf result) ;; new!
(rf result))
([result input]
(if (pred input)
(arf result input)
(brf result input)))))))
Specifically, the flushing happens at the end:
;; the [0] at the end should appear just before the 100.
(= [[-6 -5] [-4 -3] [-2 -1] 100 200 300 400 500 600 [0]]
(sequence
(if-xf pos? (map #(* % 100)) (partition-all 2))
(range -6 7)))
Is there a way to make this branching/conditional transducer without storing the entire input sequence in local state within this transducer (i.e. doing all the processing in the 1-arity branch upon cleanup)?
The idea is to complete every time the transducer switches over. IMO this is the only way to do it without buffering:
(defn if-xf
"Takes a predicate and two transducers.
Returns a new transducer that routes the input to one of the transducers
depending on the result of the predicate."
[pred a b]
(fn [rf]
(let [arf (volatile! (a rf))
brf (volatile! (b rf))
a? (volatile! nil)]
(fn
([] (rf))
([result]
(let [crf (if #a? #arf #brf)]
(-> result crf rf)))
([result input]
(let [p? (pred input)
[xrf crf] (if p? [#arf #brf] [#brf #arf])
switched? (some-> #a? (not= p?))]
(if switched?
(-> result crf (xrf input))
(xrf result input))
(vreset! a? p?)))))))
(sequence (if-xf pos? (map #(* % 100)) (partition-all 2)) [0 1 0 1 0 0 0 1])
; => ([0] 100 [0] 100 [0 0] [0] 100)
I think your question is ill-defined. What exactly do you want to happen when the transducers have state? For example, what do you expect this do:
(sequence
(if-xf even? (partition-all 3) (partition-all 2))
(range 14))
Furthermore, sometimes reducing functions have work to do at the beginning and the end and can't be restarted arbitrarily. For example, here is a reducer that computes the mean:
(defn mean
([] {:count 0, :sum 0})
([result] (double (/ (:sum result) (:count result))))
([result x]
(update-in
(update-in result [:count] inc)
[:sum] (partial + x))))
(transduce identity mean [10 20 40 40]) ;27.5
Now let's take the average, where anything below 20 counts for 20, but everything else is decreased by 1:
(transduce
(if-xf
(fn [x] (< x 20))
(map (constantly 20))
(map dec))
mean [10 20 40 40]) ;29.25
My answer is the following: I think your original solution is best. It works well using map, which is how you stated the usefulness of the conditional transducer in the first place.

Efficient side-effect-only analogue of Clojure's map function

What if map and doseq had a baby? I'm trying to write a function or macro like Common Lisp's mapc, but in Clojure. This does essentially what map does, but only for side-effects, so it doesn't need to generate a sequence of results, and wouldn't be lazy. I know that one can iterate over a single sequence using doseq, but map can iterate over multiple sequences, applying a function to each element in turn of all of the sequences. I also know that one can wrap map in dorun. (Note: This question has been extensively edited after many comments and a very thorough answer. The original question focused on macros, but those macro issues turned out to be peripheral.)
This is fast (according to criterium):
(defn domap2
[f coll]
(dotimes [i (count coll)]
(f (nth coll i))))
but it only accepts one collection. This accepts arbitrary collections:
(defn domap3
[f & colls]
(dotimes [i (apply min (map count colls))]
(apply f (map #(nth % i) colls))))
but it's very slow by comparison. I could also write a version like the first, but with different parameter cases [f c1 c2], [f c1 c2 c3], etc., but in the end, I'll need a case that handles arbitrary numbers of collections, like the last example, which is simpler anyway. I've tried many other solutions as well.
Since the second example is very much like the first except for the use of apply and the map inside the loop, I suspect that getting rid of them would speed things up a lot. I have tried to do this by writing domap2 as a macro, but the way that the catch-all variable after & is handled keeps tripping me up, as illustrated above.
Other examples (out of 15 or 20 different versions), benchmark code, and times on a Macbook Pro that's a few years old (full source here):
(defn domap1
[f coll]
(doseq [e coll]
(f e)))
(defn domap7
[f coll]
(dorun (map f coll)))
(defn domap18
[f & colls]
(dorun (apply map f colls)))
(defn domap15
[f coll]
(when (seq coll)
(f (first coll))
(recur f (rest coll))))
(defn domap17
[f & colls]
(let [argvecs (apply (partial map vector) colls)] ; seq of ntuples of interleaved vals
(doseq [args argvecs]
(apply f args))))
I'm working on an application that uses core.matrix matrices and vectors, but feel free to substitute your own side-effecting functions below.
(ns tst
(:use criterium.core
[clojure.core.matrix :as mx]))
(def howmany 1000)
(def a-coll (vec (range howmany)))
(def maskvec (zero-vector :vectorz howmany))
(defn unmaskit!
[idx]
(mx/mset! maskvec idx 1.0)) ; sets element idx of maskvec to 1.0
(defn runbench
[domapfn label]
(print (str "\n" label ":\n"))
(bench (def _ (domapfn unmaskit! a-coll))))
Mean execution times according to Criterium, in microseconds:
domap1: 12.317551 [doseq]
domap2: 19.065317 [dotimes]
domap3: 265.983779 [dotimes with apply, map]
domap7: 53.263230 [map with dorun]
domap18: 54.456801 [map with dorun, multiple collections]
domap15: 32.034993 [recur]
domap17: 95.259984 [doseq, multiple collections interleaved using map]
EDIT: It may be that dorun+map is the best way to implement domap for multiple large lazy sequence arguments, but doseq is still king when it comes to single lazy sequences. Performing the same operation as unmask! above, but running the index through (mod idx 1000), and iterating over (range 100000000), doseq is about twice as fast as dorun+map in my tests (i.e. (def domap25 (comp dorun map))).
You don't need a macro, and I don't see why a macro would be helpful here.
user> (defn do-map [f & lists] (apply mapv f lists) nil)
#'user/do-map
user> (do-map (comp println +) (range 2 6) (range 8 11) (range 22 40))
32
35
38
nil
note do-map here is eager (thanks to mapv) and only executes for side effects
Macros can use varargs lists, as the (useless!) macro version of do-map demonstrates:
user> (defmacro do-map-macro [f & lists] `(do (mapv ~f ~#lists) nil))
#'user/do-map-macro
user> (do-map-macro (comp println +) (range 2 6) (range 8 11) (range 22 40))
32
35
38
nil
user> (macroexpand-1 '(do-map-macro (comp println +) (range 2 6) (range 8 11) (range 22 40)))
(do (clojure.core/mapv (comp println +) (range 2 6) (range 8 11) (range 22 40)) nil)
Addendum:
addressing the efficiency / garbage-creation concerns:
note that below I truncate the output of the criterium bench function, for conciseness reasons:
(defn do-map-loop
[f & lists]
(loop [heads lists]
(when (every? seq heads)
(apply f (map first heads))
(recur (map rest heads)))))
user> (crit/bench (with-out-str (do-map-loop (comp println +) (range 2 6) (range 8 11) (range 22 40))))
...
Execution time mean : 11.367804 µs
...
This looks promising because it doesn't create a data structure that we aren't using anyway (unlike mapv above). But it turns out it is slower than the previous (maybe because of the two map calls?).
user> (crit/bench (with-out-str (do-map-macro (comp println +) (range 2 6) (range 8 11) (range 22 40))))
...
Execution time mean : 7.427182 µs
...
user> (crit/bench (with-out-str (do-map (comp println +) (range 2 6) (range 8 11) (range 22 40))))
...
Execution time mean : 8.355587 µs
...
Since the loop still wasn't faster, let's try a version which specializes on arity, so that we don't need to call map twice on every iteration:
(defn do-map-loop-3
[f a b c]
(loop [[a & as] a
[b & bs] b
[c & cs] c]
(when (and a b c)
(f a b c)
(recur as bs cs))))
Remarkably, though this is faster, it is still slower than the version that just used mapv:
user> (crit/bench (with-out-str (do-map-loop-3 (comp println +) (range 2 6) (range 8 11) (range 22 40))))
...
Execution time mean : 9.450108 µs
...
Next I wondered if the size of the input was a factor. With larger inputs...
user> (def test-input (repeatedly 3 #(range (rand-int 100) (rand-int 1000))))
#'user/test-input
user> (map count test-input)
(475 531 511)
user> (crit/bench (with-out-str (apply do-map-loop-3 (comp println +) test-input)))
...
Execution time mean : 1.005073 ms
...
user> (crit/bench (with-out-str (apply do-map (comp println +) test-input)))
...
Execution time mean : 756.955238 µs
...
Finally, for completeness, the timing of do-map-loop (which as expected is slightly slower than do-map-loop-3)
user> (crit/bench (with-out-str (apply do-map-loop (comp println +) test-input)))
...
Execution time mean : 1.553932 ms
As we see, even with larger input sizes, mapv is faster.
(I should note for completeness here that map is slightly faster than mapv, but not by a large degree).

clojure - ordered pairwise combination of 2 lists

Being quite new to clojure I am still struggling with its functions. If I have 2 lists, say "1234" and "abcd" I need to make all possible ordered lists of length 4. Output I want to have is for length 4 is:
("1234" "123d" "12c4" "12cd" "1b34" "1b3d" "1bc4" "1bcd"
"a234" "a23d" "a2c4" "a2cd" "ab34" "ab3d" "abc4" "abcd")
which 2^n in number depending on the inputs.
I have written a the following function to generate by random walk a single string/list.
The argument [par] would be something like ["1234" "abcd"]
(defn make-string [par] (let [c1 (first par) c2 (second par)] ;version 3 0.63 msec
(apply str (for [loc (partition 2 (interleave c1 c2))
:let [ch (if (< (rand) 0.5) (first loc) (second loc))]]
ch))))
The output will be 1 of the 16 ordered lists above. Each of the two input lists will always have equal length, say 2,3,4,5, up to say 2^38 or within available ram. In the above function I have tried to modify it to generate all ordered lists but failed. Hopefully someone can help me. Thanks.
Mikera is right that you need to use recursion, but you can do this while being both more concise and more general - why work with two strings, when you can work with N sequences?
(defn choices [colls]
(if (every? seq colls)
(for [item (map first colls)
sub-choice (choices (map rest colls))]
(cons item sub-choice))
'(())))
(defn choose-strings [& strings]
(for [chars (choices strings)]
(apply str chars)))
user> (choose-strings "123" "abc")
("123" "12c" "1b3" "1bc" "a23" "a2c" "ab3" "abc")
This recursive nested-for is a very useful pattern for creating a sequence of paths through a "tree" of choices. Whether there's an actual tree, or the same choice repeated over and over, or (as here) a set of N choices that don't depend on the previous choices, this is a handy tool to have available.
You can also take advantage of the cartesian-product from the clojure.math.combinatorics package, although this requires some pre- and post-transformation of your data:
(ns your-namespace (:require clojure.math.combinatorics))
(defn str-combinations [s1 s2]
(->>
(map vector s1 s2) ; regroup into pairs of characters, indexwise
(apply clojure.math.combinatorics/cartesian-product) ; generate combinations
(map (partial apply str)))) ; glue seqs-of-chars back into strings
> (str-combinations "abc" "123")
("abc" "ab3" "a2c" "a23" "1bc" "1b3" "12c" "123")
>
The trick is to make the function recursive, calling itself on the remainder of the list at each step.
You can do something like:
(defn make-all-strings [string1 string2]
(if (empty? string1)
[""]
(let [char1 (first string1)
char2 (first string2)
following-strings (make-all-strings (next string1) (next string2))]
(concat
(map #(str char1 %) following-strings)
(map #(str char2 %) following-strings)))))
(make-all-strings "abc" "123")
=> ("abc" "ab3" "a2c" "a23" "1bc" "1b3" "12c" "123")
(defn combine-strings [a b]
(if (seq a)
(for [xs (combine-strings (rest a) (rest b))
x [(first a) (first b)]]
(str x xs))
[""]))
Now that I wrote it I realize it's a less generic version of amalloiy's one.
You could also use the binary digits of numbers between 0 and 16 to form your combinations:
if a bit is zero select from the first string otherwise the second.
E.g. 6 = 2r0110 => "1bc4", 13 = 2r1101 => "ab3d", etc.
(map (fn [n] (apply str (map #(%1 %2)
(map vector "1234" "abcd")
(map #(if (bit-test n %) 1 0) [3 2 1 0])))); binary digits
(range 0 16))
=> ("1234" "123d" "12c4" "12cd" "1b34" "1b3d" "1bc4" "1bcd" "a234" "a23d" "a2c4" "a2cd" "ab34" "ab3d" "abc4" "abcd")
The same approach can apply to generating combinations from more than 2 strings.
Say you have 3 strings ("1234" "abcd" "ABCD"), there will be 81 combinations (3^4). Using base-3 ternary digits:
(defn ternary-digits [n] (reverse (map #(mod % 3) (take 4 (iterate #(quot % 3) n))))
(map (fn [n] (apply str (map #(%1 %2)
(map vector "1234" "abcd" "ABCD")
(ternary-digits n)
(range 0 81))
(def c1 "1234")
(def c2 "abcd")
(defn make-string [c1 c2]
(map #(apply str %)
(apply map vector
(map (fn [col rep]
(take (math/expt 2 (count c1))
(cycle (apply concat
(map #(repeat rep %) col)))))
(map vector c1 c2)
(iterate #(* 2 %) 1)))))
(make-string c1 c2)
=> ("1234" "a234" "1b34" "ab34" "12c4" "a2c4" "1bc4" "abc4" "123d" "a23d" "1b3d" "ab3d" "12cd" "a2cd" "1bcd" "abcd")

make sequence side-effectfull in Clojure

What I want to do is like following.
(def mystream (stream (range 100)))
(take 3 mystream)
;=> (0 1 2)
(take 3 mystream)
;=> (3 4 5)
(first (drop 1 mystream))
;=> 7
The stream function make sequence side-effectfull like io stream.
I think this is almost impossible.
Here is my attempt.
(defprotocol Stream (first! [this]))
(defn stream [lst]
(let [alst (atom lst)]
(reify Stream
(first! [this]
(let [[fs] #alst]
(swap! alst rest)
fs)))))
(let [mystream (stream (iterate inc 1))]
(map #(if (string? %) (first! mystream) %)
[:a "e" "b" :c "i" :f]))
;=> (:a 1 2 :c 3 :f)
Unfotunately this approach need to implement all function I will use.
Judging by your followup comment to Maurits, you don't need mutation, but rather simply need to emit a new sequence with the elements in the right place.
For example:
(defn replace-when [pred coll replacements]
(lazy-seq
(when (seq coll)
(if (seq replacements)
(if (pred (first coll))
(cons (first replacements)
(replace-when pred (rest coll) (rest replacements)))
(cons (first coll)
(replace-when pred (rest coll) replacements)))
coll))))
user=> (def seq1 [:a :b :c])
#'user/seq1
user=> (def seq2 [:x "i" "u" :y :z "e"])
#'user/seq2
user=> (replace-when string? seq2 seq1)
(:x :a :b :y :z :c)
This won't work with the standard take and drop, but you could quite easily write your own to work on a mutable atom, e.g. you could do something like this:
(def mystream (atom (range 100)))
(defn my-take [n stream]
(let [data #stream
result (take n data)]
(reset! stream (drop n data))
result))
(my-take 3 mystream)
=> (0 1 2)
(my-take 3 mystream)
=> (3 4 5)