How are transducers executed in core.async channels? - clojure

When making a channel a channel like so:
(chan 10 tx)
If i created 10 channels like this and then sent a message to all at the same time, how would the transducers be executed. Would they run concurrent or on one thread?

I think that right now the behaviour of when the transducer is run is not defined, but looking at the implementation of ManyToManyChannel, the transducer (which is the add! field) can be called both when writing and reading from the channel.
Running a simple test seems that if the channel is not full, the writing thread will execute the transducer, but if the channel is full, sometimes the reading thread runs it.
A sample with a small buffer:
(defn thread-name []
(.getName (Thread/currentThread)))
(require '[clojure.core.async :as async :refer [chan <! >! >!! go]])
(defn p [& args]
(locking *out*
(apply println (thread-name) ":" args)))
(defn log [v]
(p "Transforming" v)
v)
(def tx (map log))
(def c (chan 1 tx))
(def c2 (chan 1 tx))
(go
(loop []
(when-let [v (<! c)]
(p "Getting from c1" v)
(<! (async/timeout 100))
(recur))))
(go
(loop []
(when-let [v (<! c2)]
(p "Getting from c2" v)
(<! (async/timeout 100))
(recur))))
(dotimes [_ 5]
(p "Putting in c1" 1)
(>!! c 1)
(p "Putting in c2" 100)
(>!! c2 100))
Produces the output:
nREPL-worker-20 : Transforming 1
nREPL-worker-20 : Putting in c2 100
async-dispatch-33 : Getting from c1 1
nREPL-worker-20 : Transforming 100
nREPL-worker-20 : Putting in c1 1
async-dispatch-31 : Getting from c2 100
nREPL-worker-20 : Transforming 1
nREPL-worker-20 : Putting in c2 100
nREPL-worker-20 : Transforming 100
nREPL-worker-20 : Putting in c1 1
async-dispatch-35 : Getting from c2 100
async-dispatch-34 : Transforming 1 <---- In this case is run in the reading side
async-dispatch-34 : Getting from c1 1
nREPL-worker-20 : Putting in c2 100
nREPL-worker-20 : Transforming 100
async-dispatch-37 : Getting from c2 100
async-dispatch-36 : Getting from c1 1
nREPL-worker-20 : Putting in c1 1

Related

Why the program runs endlessly?

Why the program runs endlessly?
(defn lazycombine
([s] (lazycombine s []))
([s v] (let [a (take 1 s)
b (drop 1 s)]
(if (= a :start)
(lazy-seq (lazycombine b v))
(if (= a :end)
(lazy-seq (cons v (lazycombine b [])))
(lazy-seq (lazycombine b (conj v a))))))))
(def w '(:start 1 2 3 :end :start 7 7 :end))
(lazycombine w)
I need a function that returns a lazy sequence of elements by taking elements from another sequence of the form [: start 1 2: end: start: 5: end] and combining all the elements between: start and: end into a vector
You need to handle the termination condition - i.e. what should return when input s is empty?
And also the detection of :start and :end should use first instead of (take 1 s). And you can simplify that with destructuring.
(defn lazycombine
([s] (lazycombine s []))
([[a & b :as s] v]
(if (empty? s)
v
(if (= a :start)
(lazy-seq (lazycombine b v))
(if (= a :end)
(lazy-seq (cons v (lazycombine b [])))
(lazy-seq (lazycombine b (conj v a))))))))
(def w '(:start 1 2 3 :end :start 7 7 :end))
(lazycombine w)
;; => ([1 2 3] [7 7])
To reduce cyclomatic complexity a bit, you can use condp to replace couple if:
(defn lazycombine
([s] (lazycombine s []))
([[a & b :as s] v]
(if (empty? s)
v
(lazy-seq
(condp = a
:start (lazycombine b v)
:end (cons v (lazycombine b []))
(lazycombine b (conj v a)))))))
I would do it like so, using take-while:
(ns tst.demo.core
(:use tupelo.core tupelo.test))
(def data
[:start 1 2 3 :end :start 7 7 :end])
(defn end-tag? [it] (= it :end))
(defn start-tag? [it] (= it :start))
(defn lazy-segments
[data]
(when-not (empty? data)
(let [next-segment (take-while #(not (end-tag? %)) data)
data-next (drop (inc (count next-segment)) data)
segment-result (vec (remove #(start-tag? %) next-segment))]
(cons segment-result
(lazy-seq (lazy-segments data-next))))))
(dotest
(println "result: " (lazy-segments data)))
Running we get:
result: ([1 2 3] [7 7])
Note the contract when constructing a sequence recursively using cons (lazy or not). You must return either the next value in the sequence, or nil. Supplying nil to cons is the same as supplying an empty sequence:
(cons 5 nil) => (5)
(cons 5 []) => (5)
So it is convenient to use a when form to test the termination condition (instead of using if and returning an empty vector when the sequence must end).
Suppose we wrote the cons as a simple recursion:
(cons segment-result
(lazy-segments data-next))
This works great and produces the same result. The only thing the lazy-seq part does is to delay when the recursive call takes place. Because lazy-seq is a Clojure built-in (special form), it it is similar to loop/recur and does not consume the stack like ordinary recursion does . Thus, we can generate millions (or more) values in the lazy sequence without creating a StackOverflowError (on my computer, the default maximum stack size is ~4000). Consider the infinite lazy-sequence of integers beginning at 0:
(defn intrange
[n]
(cons n (lazy-seq (intrange (inc n)))))
(dotest
(time
(spyx (first (drop 1e6 (intrange 0))))))
Dropping the first million integers and taking the next one succeeds and requires only a few milliseconds:
(first (drop 1000000.0 (intrange 0))) => 1000000
"Elapsed time: 49.5 msecs"

lazy re-implementation of Clojure Interleaving

I want to rewrite (I'm not sure if the original implementation is lazy of not) a lazy implementation of clojure interleaving using lazy-seq that works like this :
(take 4 (lazy-interleave ’( 1 2 3) ’( a b c)))
(1 a 2 b)
I came up with something like this, but I'm not sure why it doesn't work:
(defn lazy-interleave [v1 v2]
(lazy-seq (concat (list (first v1) (first v2))) (lazy-interleave (next v1) (next v2)))
)
Edit:
Thanks to Arthur's answer, here is a modified working solution:
(defn lazy-interleave [v1 v2]
(lazy-seq
(cons (first v1) (cons (first v2) (lazy-interleave (rest v1) (rest v2))))
)
)
A bit of reformatting reveals the problem:
(defn lazy-interleave [v1 v2]
(lazy-seq
(concat (list (first v1) (first v2)))
(lazy-interleave (next v1) (next v2))))
In other words, you're constructing a lazy sequence that, when realized, will evaluate (concat (list (first v1) (first v2))), ignore the result, and then try to evaluate and return (lazy-interleave (next v1) (next v2)). This call to lazy-interleave does the same thing, again dropping the first elements of v1 and v2, and so on, ad infinitum.
You never get to the bottom because you have no empty check, and so since (next nil) returns nil, it just keeps going even after you exhaust both sequences. You don't get a StackOverflowError because you're using lazy sequences instead of recursion.
A correct implementation would look like this:
(defn lazy-interleave [v1 v2]
(when (and (seq v1) (seq v2))
(lazy-cat [(first v1) (first v2)]
(lazy-interleave (rest v1) (rest v2)))))
interleave is already lazy:
user> (interleave (take 5 (iterate #(do (println "sequence A:" %) (inc %)) 0))
(take 5 (iterate #(do (println "sequence B:" %) (inc %)) 100)))
sequence A: 0
sequence B: 100
sequence A: 1
sequence B: 101
sequence A: 2
sequence B: 102
sequence A: 3
sequence B: 103
(0 100 1 101 2 102 3 103 4 104)
user> (take 4 (interleave (take 5 (iterate #(do (println "sequence A:" %) (inc %)) 0))
(take 5 (iterate #(do (println "sequence B:" %) (inc %)) 100))))
sequence A: 0
sequence B: 100
(0 100 1 101)
And the core of it's implementation looks much like your example:
(lazy-seq
(let [s1 (seq c1) s2 (seq c2)]
(when (and s1 s2)
(cons (first s1) (cons (first s2)
(interleave (rest s1) (rest s2)))))))
Except it also works on more than two sequences, so it has another arity that handles that case.

Throttle Functions with core.async

The number of possible executions of a function should be throttled. So after calling a function, any repeated call should be ignored within a time period. If there where calls in the meantime, the last one should be executed after the time period.
Here's my approach with core.async. The problem here is, that additional calls are summing up in the channel c. I'd need a channel with only one position inside, which will be overridden by put! everytime.
(defn throttle [f time]
(let [c (chan 1)]
(go-loop []
(apply f (<! c))
(<! (timeout time))
(recur))
(fn [& args]
(put! c (if args args [])))))
usage:
(def throttled (throttle #(print %) 4000))
(doseq [x (range 10)]
(throttled x))
; 0
;... after 4 seconds
; 9
Does anyone have an idea how to fix this?
Solution
(defn throttle [f time]
(let [c (chan (sliding-buffer 1))]
(go-loop []
(apply f (<! c))
(<! (timeout time))
(recur))
(fn [& args]
(put! c (or args [])))))
To solve your channel question you can use a chan with a sliding buffer:
user> (require '[clojure.core.async :as async])
nil
user> (def c (async/chan (async/sliding-buffer 1)))
#'user/c
user> (async/>!! c 1)
true
user> (async/>!! c 2)
true
user> (async/>!! c 3)
true
user> (async/<!! c)
3
that way only the last value put into the channel will be computed at the next interval.
You can use a debounce function.
I'll copy it out here:
(defn debounce [in ms]
(let [out (chan)]
(go-loop [last-val nil]
(let [val (if (nil? last-val) (<! in) last-val)
timer (timeout ms)
[new-val ch] (alts! [in timer])]
(condp = ch
timer (do (>! out val) (recur nil))
in (recur new-val))))
out))
Here only when in has not emitted a message for ms is the last value it emitted forwarded onto the out channel. While in continues to emit without a long enough pause between emits then all-but-the-last-message are continuously discarded.
I've tested this function. It waits 4 seconds and then prints out 9, which is nearly what you asked for - some tweaking required!
(defn my-sender [to-chan values]
(go-loop [[x & xs] values]
(>! to-chan x)
(when (seq xs) (recur xs))))
(defn my-receiver [from-chan f]
(go-loop []
(let [res (<! from-chan)]
(f res)
(recur))))
(defn setup-and-go []
(let [in (chan)
ch (debounce in 4000)
sender (my-sender in (range 10))
receiver (my-receiver ch #(log %))]))
And this is the version of debounce that will output as required by the question, which is 0 immediately, then wait four seconds, then 9:
(defn debounce [in ms]
(let [out (chan)]
(go-loop [last-val nil
first-time true]
(let [val (if (nil? last-val) (<! in) last-val)
timer (timeout (if first-time 0 ms))
[new-val ch] (alts! [in timer])]
(condp = ch
timer (do (>! out val) (recur nil false))
in (recur new-val false))))
out))
I've used log rather than print as you did. You can't rely on ordinary println/print functions with core.async. See here for an explanation.
This is taken from David Nolens blog's source code:
(defn throttle*
([in msecs]
(throttle* in msecs (chan)))
([in msecs out]
(throttle* in msecs out (chan)))
([in msecs out control]
(go
(loop [state ::init last nil cs [in control]]
(let [[_ _ sync] cs]
(let [[v sc] (alts! cs)]
(condp = sc
in (condp = state
::init (do (>! out v)
(>! out [::throttle v])
(recur ::throttling last
(conj cs (timeout msecs))))
::throttling (do (>! out v)
(recur state v cs)))
sync (if last
(do (>! out [::throttle last])
(recur state nil
(conj (pop cs) (timeout msecs))))
(recur ::init last (pop cs)))
control (recur ::init nil
(if (= (count cs) 3)
(pop cs)
cs)))))))
out))
(defn throttle-msg? [x]
(and (vector? x)
(= (first x) ::throttle)))
(defn throttle
([in msecs] (throttle in msecs (chan)))
([in msecs out]
(->> (throttle* in msecs out)
(filter #(and (vector? %) (= (first %) ::throttle)))
(map second))))
You probably also want to add a dedupe transducer to the channel.
I needed to pass a function to capture the args because I was using it for an input event and it was passing a mutable object. 🤷
(defn throttle-for-mutable-args [time f arg-capture-fn]
(let [c (async/chan (async/sliding-buffer 1))]
(async-m/go-loop []
(f (async/<! c))
(async/<! (async/timeout time))
(recur))
(fn [& args]
(async/put! c (apply arg-capture-fn (or args []))))))
And I use like
[:input
{:onChange (util/throttle-for-mutable-args
500
#(really-use-arg %)
#(-> % .-target .-value))}]

How to create a channel from another with transducers?

I want to create a channel of clojure.core.async from another one that just filters specific messages. Therefore I found a function called filter<.
=> (def c1 (chan))
=> (def c2 (filter< even? c1))
=> (put! c1 1)
=> (put! c1 2)
=> (<!! c2)
2
But the function and its friends are marked as deprecated:
Deprecated - this function will be removed. Use transducer instead
There are some ways to use channels with transducer like chan with the xform parameter. How can I build a new channel from an existing one using transducers?
I did some research on this, found a couple of interesting articles (first and second), and then got something working using pipeline
(require '[clojure.core.async :as async :refer [chan <!! pipeline put!]])
(def c1 (chan))
(def c2 (chan))
(pipeline 4 c2 (filter even?) c1)
(put! c1 1)
(put! c1 2)
(<!! c2)
;;=> 2
The second article I linked makes this a bit cleaner with some helper functions around the pipeline function:
(defn ncpus []
(.availableProcessors (Runtime/getRuntime)))
(defn parallelism []
(+ (ncpus) 1))
(defn add-transducer
[in xf]
(let [out (chan (buffer 16))]
(pipeline (parallelism) out xf in)
out))
Then you can simply tie channels together with
(def c1 (chan))
(def c2 (add-transducer c1 (filter even?))
To complete the answer, as you found yourself you can use pipe in a similar fashion:
(defn pipe-trans
[ci xf]
(let [co (chan 1 xf)]
(pipe ci co)
co))
(def c1 (chan))
(def c2 (pipe-trans c1 (filter even?)))

Clojure: map function with updatable state

What is the best way of implementing map function together with an updatable state between applications of function to each element of sequence? To illustrate the issue let's suppose that we have a following problem:
I have a vector of the numbers. I want a new sequence where each element is multiplied by 2 and then added number of 10's in the sequence up to and including the current element. For example from:
[20 30 40 10 20 10 30]
I want to generate:
[40 60 80 21 41 22 62]
Without adding the count of 10 the solution can be formulated using a high level of abstraction:
(map #(* 2 %) [20 30 40 10 20 10 30])
Having count to update forced me to "go to basic" and the solution I came up with is:
(defn my-update-state [x state]
(if (= x 10) (+ state 1) state)
)
(defn my-combine-with-state [x state]
(+ x state))
(defn map-and-update-state [vec fun state update-state combine-with-state]
(when-not (empty? vec)
(let [[f & other] vec
g (fun f)
new-state (update-state f state)]
(cons (combine-with-state g new-state) (map-and-update-state other fun new-state update-state combine-with-state))
)))
(map-and-update-state [20 30 40 50 10 20 10 30 ] #(* 2 %) 0 my-update-state my-combine-with-state )
My question: is it the appropriate/canonical way to solve the problem or I overlooked some important concepts/functions.
PS:
The original problem is walking AST (abstract syntax tree) and generating new AST together with updating symbol table, so when proposing the solution to the problem above please keep it in mind.
I do not worry about blowing up stack, so replacement with loop+recur is not
my concern here.
Is using global Vars or Refs instead of passing state as an argument a definite no-no?
You can use reduce to accumulate a pair of the number of 10s seen so far and the current vector of results.:
(defn map-update [v]
(letfn [(update [[ntens v] i]
(let [ntens (+ ntens (if (= 10 i) 1 0))]
[ntens (conj v (+ ntens (* 2 i)))]))]
(second (reduce update [0 []] v))))
To count # of 10 you can do
(defn count-10[col]
(reductions + (map #(if (= % 10) 1 0) col)))
Example:
user=> (count-10 [1 2 10 20 30 10 1])
(0 0 1 1 1 2 2)
And then a simple map for the final result
(map + col col (count-10 col)))
Reduce and reductions are good ways to traverse a sequence keeping a state. If you feel your code is not clear you can always use recursion with loop/recur or lazy-seq like this
(defn twice-plus-ntens
([coll] (twice-plus-ntens coll 0))
([coll ntens]
(lazy-seq
(when-let [s (seq coll)]
(let [item (first s)
new-ntens (if (= 10 item) (inc ntens) ntens)]
(cons (+ (* 2 item) new-ntens)
(twice-plus-ntens (rest s) new-ntens)))))))
have a look at map source code evaluating this at your repl
(source map)
I've skipped chunked optimization and multiple collection support.
You can make it a higher-order function this way
(defn map-update
([mf uf coll] (map-update mf uf (uf) coll))
([mf uf val coll]
(lazy-seq
(when-let [s (seq coll)]
(let [item (first s)
new-status (uf item val)]
(cons (mf item new-status)
(map-update mf uf new-status (rest s))))))))
(defn update-f
([] 0)
([item status]
(if (= item 10) (inc status) status)))
(defn map-f [item status]
(+ (* 2 item) status))
(map-update map-f update-f in)
The most appropriate way is to use function with state
(into
[]
(map
(let [mem (atom 0)]
(fn [val]
(when (== val 10) (swap! mem inc))
(+ #mem (* val 2)))))
[20 30 40 10 20 10 30])
also see
memoize
standard function