Elixir: Finding Prime Numbers with Workers, Only 1 CPU Used - concurrency

I'm learning Elixir and decided to write a demo application using GenServer and workers to find prime numbers.
Application
Setup
I created my application as follows:
$ mix new primes
$ cd primes
Structure
$ tree lib
lib/
├── prime_numbers.ex
├── prime_server.ex
└── prime_worker.ex
lib/prime_numbers.ex
This is the module that performs the actual logic to figure out whether or not a given number is a prime number (is_prime/1):
defmodule PrimeNumbers do
def is_prime(x) when is_number(x) and x == 2, do: true
def is_prime(x) when is_number(x) and x > 2 do
from = 2
to = trunc(:math.sqrt(x))
n_total = to - from + 1
n_tried =
Enum.take_while(from..to, fn i -> rem(x, i) != 0 end)
|> Enum.count()
n_total == n_tried
end
def is_prime(x) when is_number(x), do: false
end
lib/prime_worker.ex
This is a worker process that uses the PrimeNumbers module (is_prime/1):
defmodule PrimeWorker do
defstruct number: -1, handled: 0
use GenServer
def start(number) do
GenServer.start(__MODULE__, number)
end
def is_prime(pid, x) do
GenServer.call(pid, {:is_prime, x})
end
def stats(pid) do
GenServer.call(pid, {:stats})
end
def init(number) do
{:ok, %PrimeWorker{number: number, handled: 0}}
end
def handle_call({:is_prime, x}, _, state) do
result = PrimeNumbers.is_prime(x)
{:reply, result, %PrimeWorker{state | handled: state.handled + 1}}
end
def handle_call({:stats}, _, state) do
{:reply, state.handled, state}
end
end
It also provides statistics (stats/1) about the amount of requests it handled.
lib/prime_server.ex
This is the server that creates and manages the workers, and provides them for individual computations (is_prime/1):
defmodule PrimeServer do
defstruct n_workers: 0, workers: %{}
use GenServer
def start(n_workers) when is_number(n_workers) and n_workers > 0 do
GenServer.start(__MODULE__, n_workers)
end
def is_prime(pid, x) do
GenServer.call(pid, {:is_prime, x})
end
def worker_stats(pid) do
GenServer.call(pid, {:worker_stats})
end
def init(n_workers) do
workers =
for i <- 0..(n_workers - 1),
into: %{},
do: {i, (fn {:ok, worker} -> worker end).(PrimeWorker.start(i))}
{:ok, %PrimeServer{n_workers: n_workers, workers: workers}}
end
def handle_call({:is_prime, x}, caller, state) do
spawn(fn ->
i_worker = rem(x, state.n_workers)
worker = Map.get(state.workers, i_worker)
GenServer.reply(caller, PrimeWorker.is_prime(worker, x))
end)
{:noreply, state}
end
def handle_call({:worker_stats}, _, state) do
stats =
state.workers
|> Enum.map(fn {i, pid} ->
stats = PrimeWorker.stats(pid)
{i, stats}
end)
{:reply, stats, state}
end
end
It provides overall worker stats (worker_stats/1) to demonstrate that the work performed was actually distributed over multiple processes.
primes.exs
This is a test script to run the computation:
args = System.argv()
[n, procs | _] = args
{n, ""} = Integer.parse(n, 10)
{procs, ""} = Integer.parse(procs, 10)
{:ok, pid} = PrimeServer.start(procs)
1..n
|> Stream.filter(fn i -> PrimeServer.is_prime(pid, i) end)
|> Enum.each(fn i -> IO.puts("#{i} is a prime number.") end)
PrimeServer.worker_stats(pid)
|> Enum.each(fn {k, v} -> IO.puts("Worker #{k} handled #{v} jobs.") end)
It can be run as follows, indicating both the upper limit of the numbers to be checked, and the number of workers to be spawned:
$ mix run primes.exs 20 3
2 is a prime number.
5 is a prime number.
7 is a prime number.
11 is a prime number.
13 is a prime number.
17 is a prime number.
19 is a prime number.
Worker 0 handled 6 jobs.
Worker 1 handled 7 jobs.
Worker 2 handled 7 jobs.
The whole project can be found on GitHub
Problem
This all works fine. However, I want the program to make use of my 8 CPU cores. But that does not happen:
$ mix run primes.exs 1000000 8 &
$ htop
Why isn't the work parallelized?
Versions Used
Elixir
$ elixir --version
Erlang/OTP 24 [erts-12.2.1] [source] [64-bit] [smp:8:8] [ds:8:8:10] [async-threads:1] [jit]
Elixir 1.13.2 (compiled with Erlang/OTP 24)
Mix
$ mix --version
Erlang/OTP 24 [erts-12.2.1] [source] [64-bit] [smp:8:8] [ds:8:8:10] [async-threads:1] [jit]
Mix 1.13.2 (compiled with Erlang/OTP 24)

I figured out a way to really make my CPU cores work. Now I use concurrency primitives instead of GenServer (prime_workers.exs):
defmodule PrimeNumbers do
def is_prime(x) when is_number(x) and x == 2, do: true
def is_prime(x) when is_number(x) and x > 2 do
from = 2
to = trunc(:math.sqrt(x))
n_total = to - from + 1
n_tried =
Enum.take_while(from..to, fn i -> rem(x, i) != 0 end)
|> Enum.count()
n_total == n_tried
end
def is_prime(x) when is_number(x), do: false
end
defmodule PrimeWorker do
def start() do
spawn(fn -> loop() end)
end
defp loop() do
receive do
{:is_prime, x, pid} ->
send(pid, {:prime_result, x, PrimeNumbers.is_prime(x)})
loop()
{:terminate, pid} ->
send(pid, {:done})
end
end
end
defmodule PrimeClient do
def start() do
spawn(fn -> loop(0) end)
end
def loop(found) do
receive do
{:prime_result, _, prime} ->
if prime do
loop(found + 1)
else
loop(found)
end
{:query_primes, pid} ->
send(pid, {:primes_found, found})
end
loop(found)
end
end
args = System.argv()
[n, n_workers | _] = args
{n, ""} = Integer.parse(n, 10)
{n_workers, ""} = Integer.parse(n_workers, 10)
client = PrimeClient.start()
workers =
for i <- 0..(n_workers - 1),
into: %{},
do: {i, PrimeWorker.start()}
Enum.each(2..n, fn x ->
i_worker = rem(x, n_workers)
worker = Map.get(workers, i_worker)
send(worker, {:is_prime, x, client})
end)
workers
|> Enum.each(fn {_, w} ->
send(w, {:terminate, self()})
receive do
{:done} -> {:nothing}
end
end)
send(client, {:query_primes, self()})
receive do
{:primes_found, found} ->
IO.puts("Found #{found} primes from 2 to #{n}.")
end
Now my CPU cores are properly used:
$ time elixir prime_workers.exs 1000000 1000
Found 78497 primes from 2 to 1000000.
real 0m1.761s
user 0m10.327s
sys 0m0.346s

Related

Trim an infinite list of random numbers

I'm trying to trim a list of random numbers so that sum of the numbers in [0,1] in the resulting smaller list accumultates to a value under 1.
This is interesting in a sense that average of these list prefix lengths is e, somehow.
While getting the length of prefix I encountered a problem - I managed to get the program to work on a determined infinite list, or a slice of random list, but the program hangs on infinite random list. What am I doing wrong?
import System.Random
-- Count list items that accumulate to 1.
count :: (Num a, Ord a) => [a] -> Int
count xs = 1 + length xs'
where xs'= takeWhile (< 1) $ scanl1 (+) xs
-- Works of infinite list
a = (return (repeat 0.015)) :: IO [Double]
fa = fmap count a
--67
-- Works on trimmed list of randoms
rio = randomRIO (0::Double, 1)
b = sequence $ take 10 (repeat rio)
fb = fmap count b
-- fb is usually 2 to 5
-- Hangs on infinite list of randoms
c = sequence (repeat rio)
fc = fmap count c
-- fc hangs... ;(
You can define an IO action to create an infinite stream of ranged random numbers like so:
import System.Random
randomRIOs :: (Random a) => (a, a) -> IO [a]
randomRIOs (a, b) = randomRs (a, b) <$> newStdGen
After which, the following works fine with your definition of count:
main = do
n <- count <$> randomRIOs (0::Double, 1)
print n
You can't really have an infinite list of random numbers, because randomness is too strict. So, you can't have the call to sequence outside of your call to count. One obvious thing you could try would be a partial reimplementation of sequence inside of count itself:
count :: (Num a, Ord a, Monad m) => [m a] -> m (Maybe Int)
count = go 0 0
where go n total [] = pure Nothing
go n total (x:xs) = do
num <- x
let total' = total + num
n' = succ n
if total' >= 1
then pure $ Just n'
else go n' total' xs
I've also amended the result to return a Maybe, because it seems wrong to return 1 for an empty list (as yours did), or return the length of a list even if its elements sum to something less than 1.
Another reasonable change would be to not accept [m a], but just a single m a, which can reliably produce a value any number of times. Then we don't have to worry about the input running out, and so no Maybe is needed:
count :: (Num a, Ord a, Monad m) => m a -> m Int
count m = go 0 0
where go n total = do
num <- m
let total' = total + num
n' = succ n
if total' >= 1
then pure n'
else go n' total'
Then you can simply call count $ randomRIO (0::Double, 1), and it will produce as many random numbers as are needed.

Return the n-th element of a list

I want to find the n-th element of my list[1,2,3]. I want to choose element 2, using recursion.
I have already found the length using:
def module T do
def main do
len([1,2,3])
end
def len([]), do: 0
def len([h|t]), do: 1+len(t)
end
There's def a library function for that (Enum.at/2), but if you want your own implementation:
# Little extra to handle negative positions (from the end of the list)
# element_at([1,2,3], -1) == 3
def element_at(list, n) when n < 0 do
element_at(Enum.reverse(list), -n - 1)
end
def element_at([], _), do: nil
def element_at([h | _t], 0), do: h
def element_at([_h | t], n), do: element_at(t, n - 1)
I suppose you want to implement it by yourself. Otherwise there are a lot of functions you my use in the Enum and List modules.
defmodule Test do
def nth_element(list, n) when is_list(list) and is_integer(n) do
case {list, n} do
{[], _} -> nil
{[element | _], 0} -> element
{[_ | rest], _} -> nth_element(rest, n - 1)
end
end
def nth_element(_, _), do: nil
end
Argument n of the nth_element function is the index (it starts with 0) of the element you are looking for. If the list is empty or it has no element at the given index, it returns nil. Otherwise the function uses pattern matching to get an element and the rest of the list. If you are looking for the first element (index = n = 0) then it will return the element. If you are looking for an element at a higher index, the function will be called recursively with the rest of the list and the given index minus one element. This way you find the element you want in the recursion if n == 0. In the case the list is empty but n >= 0 the recursion ends, because of the first function definition and it returns nil.
I saved the module in test.ex. Execution:
$ iex
iex(1)> c "test.ex"
iex(2)> Test.nth_element([1,2,5], 0)
# => 1
iex(3)> Test.nth_element([1,2,5], 1)
# => 2
iex(4)> Test.nth_element([1,2,5], 2)
# => 5
iex(5)> Test.nth_element([1,2,5], 3)
# => nil

Concurrent Sieve in Erlang

I have a code that uses the Sieve of Eratosthenes method to generate prime numbers up to a given limit N.
Method:
Split the list of odd numbers into segments
Each segment gets passed to a process
Segments are sieved concurrently with the set Lp
Here is the code:
%---------------------------------------------------------------------------------
primes(N) ->
simple_sieve(lists:seq(3,N,2),[],N).
simple_sieve(Lin,Lp,N) -> [H|_] = Lin,
case H*H < N of
true -> simple_sieve([X || X <- Lin, X rem H =/= 0], [H|Lp], N);
false -> lists:reverse(Lp) ++ Lin
end.
%---------------------------------------------------------------------------------
primes_parr(N, Num_blocks) ->
Pid_stem = self(),
SQ = round(math:sqrt(N)),
Lp = primes(SQ), io:fwrite("List of primes: ~w~n", [Lp]),
Block_size = round(N/Num_blocks),
ok = leaf_spawn(Pid_stem, Lp, SQ, Block_size, Num_blocks),
stem_loop(Lp, 0, Num_blocks).
stem_loop(Lp, Primes, 0) ->
1 + length(Lp) + Primes;
stem_loop(Lp, Primes, Num_blocks) ->
receive
{leaf_done, _, Leaf_nums} ->
stem_loop(Lp, Primes+Leaf_nums, Num_blocks-1)
end.
leaf_spawn(_, _, _, _, 0) -> ok;
leaf_spawn(Pid_stem, Lp, SQ, Block_size, Num_blocks) ->
case (Num_blocks==1) of
true -> case (SQ rem 2 == 0) of
true -> Start = SQ+1;
false -> Start = SQ
end;
false -> Start = 1
end,
First = (Num_blocks-1)*Block_size + Start,
Last = Num_blocks*Block_size,
io:fwrite("Start: ~w | Finish: ~w ~n", [First,Last]),
spawn(fun() -> leaf(Pid_stem, Num_blocks, First, Last, [], Lp) end),
leaf_spawn(Pid_stem, Lp, SQ, Block_size, Num_blocks-1).
leaf(Pid_stem, Leaf_id, First, Last, Leaf_nums, []) ->
L = ordsets:subtract(lists:seq(First,Last,2),lists:usort(Leaf_nums)),
io:fwrite("The sublist is: ~w~n", [L]),
Pid_stem ! {leaf_done, Leaf_id, length(ordsets:subtract(lists:seq(First,Last,2),lists:usort(Leaf_nums)))};
leaf(Pid_stem, Leaf_id, First, Last, Leaf_nums, [H|T]) ->
case (H*H =< Last) of
true ->
case H*H >= First of
true ->
leaf(Pid_stem, Leaf_id, First, Last, lists:seq(H*H, Last, 2*H) ++ Leaf_nums, T);
false ->
K = round((First - H*H)/(2*H)),
leaf(Pid_stem, Leaf_id, First, Last, lists:seq(H*H + 2*K*H, Last, 2*H) ++ Leaf_nums, T)
end;
false ->
leaf(Pid_stem, Leaf_id, First, Last, Leaf_nums, [])
end.
If I call the function primes_parr(100,2), the code works just fine. Giving me the output:
List of primes(Lp): [3,5,7]
Start: 51 | Finish: 100
Start: 11 | Finish: 50
The sublist is: [53,59,61,67,71,73,79,83,89,97]
The sublist is: [11,13,17,19,23,29,31,37,41,43,47]
25 %no. of primes
But if I call primes_parr(100,3), the output becomes invalid. With the output being:
List of primes(Lp): [3,5,7]
Start: 67 | Finish: 99
Start: 34 | Finish: 66
Start: 11 | Finish: 33
The sublist is: [67,71,73,79,83,89,97]
The sublist is: [34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66]
The sublist is: [11,13,17,19,23,29,31]
35 %no. of primes
I'd like to know what's causing the error if I split the list into more than two segments.
Judging by your invalid output
The sublist is: [67,71,73,79,83,89,97]
The sublist is: [34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66] %% HERE
The sublist is: [11,13,17,19,23,29,31]
somewhere your code assumes that a starting number of a range to sieve is odd.
Indeed,
leaf(Pid_stem, Leaf_id, First, Last, Leaf_nums, []) ->
L = ordsets:subtract(lists:seq(First,Last,2),lists:usort(Leaf_nums)),
io:fwrite("The sublist is: ~w~n", [L]),
Pid_stem ! {leaf_done, Leaf_id, length(L)}; %% reuse the L !!
assumes that First is odd, when it calculates lists:seq(First,Last,2).
This should be simple to fix. Just add 1 to First if it's even, immediately after you've calculated it inside leaf_spawn. (edit: better do it here, on entry into leaf, so its requirement is clearly seen and is enforced by it itself).
Also, sometimes your biggest prime in Lp is also included as the first prime in the lowest block (e.g. for N=121, N=545).

Better way to solve this [Int] -> Int -> Int

Here is an sample problem I'm working upon:
Example Input: test [4, 1, 5, 6] 6 returns 5
I'm solving this using this function:
test :: [Int] -> Int -> Int
test [] _ = 0
test (x:xs) time = if (time - x) < 0
then x
else test xs $ time - x
Any better way to solve this function (probably using any inbuilt higher order function) ?
How about
test xs time = maybe 0 id . fmap snd . find ((>time) . fst) $ zip sums xs
where sums = scanl1 (+) xs
or equivalently with that sugary list comprehension
test xs time = headDef 0 $ [v | (s, v) <- zip sums xs, s > time]
where sums = scanl1 (+) xs
headDef is provided by safe. It's trivial to implement (f _ (x:_) = x; f x _ = x) but the safe package has loads of useful functions like these so it's good to check out.
Which sums the list up to each point and finds the first occurence greater than time. scanl is a useful function that behaves like foldl but keeps intermediate results and zip zips two lists into a list of tuples. Then we just use fmap and maybe to manipulate the Maybe (Integer, Integer) to get our result.
This defaults to 0 like yours but I like the version that simply goes to Maybe Integer better from a user point of view, to get this simply remove the maybe 0 id.
You might like scanl and its close relative, scanl1. For example:
test_ xs time = [curr | (curr, tot) <- zip xs (scanl1 (+) xs), tot > time]
This finds all the places where the running sum is greater than time. Then you can pick the first one (or 0) like this:
safeHead def xs = head (xs ++ [def])
test xs time = safeHead 0 (test_ xs time)
This is verbose, and I don't necessarily recommend writing such a simple function like this (IMO the pattern matching & recursion is plenty clear). But, here's a pretty declarative pipeline:
import Control.Error
import Data.List
deadline :: (Num a, Ord a) => a -> [a] -> a
deadline time = fromMaybe 0 . findDeadline time
findDeadline :: (Num a, Ord a) => a -> [a] -> Maybe a
findDeadline time xs = decayWithDifferences time xs
>>= findIndex (< 0)
>>= atMay xs
decayWithDifferences :: Num b => b -> [b] -> Maybe [b]
decayWithDifferences time = tailMay . scanl (-) time
-- > deadline 6 [4, 1, 5, 6]
-- 5
This documents the code a bit and in principle lets you test a little better, though IMO these functions fit more-or-less into the 'obviously correct' category.
You can verify that it matches your implementation:
import Test.QuickCheck
prop_equality :: [Int] -> Int -> Bool
prop_equality time xs = test xs time == deadline time xs
-- > quickCheck prop_equality
-- +++ OK, passed 100 tests.
In this particular case zipping suggested by others in not quite necessary:
test xs time = head $ [y-x | (x:y:_) <- tails $ scanl1 (+) $ 0:xs, y > time]++[0]
Here scanl1 will produce a list of rolling sums of the list xs, starting it with 0. Therefore, tails will produce a list with at least one list having two elements for non-empty xs. Pattern-matching (x:y:_) extracts two elements from each tail of rolling sums, so in effect it enumerates pairs of neighbouring elements in the list of rolling sums. Filtering on the condition, we reconstruct a part of the list that starts with the first element that produces a rolling sum greater than time. Then use headDef 0 as suggested before, or append a [0], so that head always returns something.
If you want to retain readability, I would just stick with your current solution. It's easy to understand, and isn't doing anything wrong.
Just because you can make it into a one line scan map fold mutant doesn't mean that you should!

iterate list creation from IO Int, How to?

I am playing with linkedlist problem in python challenge that require querying a next value (guess it be Int).
I create function for get the next value as follows
url = "http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing="
getNext :: Int -> IO Int
getNext x = do
rsp <- simpleHTTP (getRequest $ url ++ show x)
bdy <- getResponseBody rsp
let num = last $ splitWhen (==' ') bdy
return (read num::Int)
and it work fine (in ghci)
> getNext 12345
44827
> getNext 44827
45439
While I suppose to repeatedly call getNext until I found the answer, I think I should keep the history like I can do in non-monadic world so I can continue from the last value in case something fail.
> let nX x = x + 3
> :t nX
nX :: Num a => a -> a
> take 10 $ iterate nX 1
[1,4,7,10,13,16,19,22,25,28]
I think it should be a monadic lifted version of iterate and found iterateM_ from Control.Monad.Loops but it didn't work as I expected. There is nothing shown (I think _ suffix mean discard the result but there is no iterateM)
> :t iterate
iterate :: (a -> a) -> a -> [a]
> :t iterateM_
iterateM_ :: Monad m => (a -> m a) -> a -> m b
Question is how can I get [Int] as in non-monadic iteration. I think I want a function that return IO [Int] to be able to pull-out and filter/process in my code like this
main = do
i <- getAllList
let answer = last i -- or could be a repeated converged value, don't know yet
putStrLn (show answer)
getAllList :: IO [Int]
If you want your function to terminate early, rather than give back an
infinite list of results, you will want to use unfoldrM rather than
iterateM. This can be done with something like the following:
url = "http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing="
start = 12345
stop = 10000
shouldStop :: Int -> Bool
shouldStop x = x == stop
getNext :: Int -> IO (Maybe (Int, Int))
getNext prev
| shouldStop prev = return Nothing
| otherwise = do
rsp <- simpleHTTP (getRequest $ url ++ show prev)
bdy <- getResponseBody rsp
let num = read $ last $ splitWhen (==' ') bdy :: Int
print (prev, num)
return $ Just (num, num)
getAllList :: IO [Int]
getAllList = unfoldrM getNext start
This will allow you to define a stopping criteria so that the loop can
terminate, but you will not receive results back until the termination
criteria has been met.
The unfoldrM function can be found in the monad-loops package, but the
latest version keeps reusing the original seed rather than the one produced by
the generator function (I believe this has been fixed but not uploaded to
Hackage). This is the version of unfoldrM that you would want.
-- |See 'Data.List.unfoldr'. This is a monad-friendly version of that.
unfoldrM :: (Monad m) => (a -> m (Maybe (b,a))) -> a -> m [b]
unfoldrM = unfoldrM'
-- |See 'Data.List.unfoldr'. This is a monad-friendly version of that, with a
-- twist. Rather than returning a list, it returns any MonadPlus type of your
-- choice.
unfoldrM' :: (Monad m, MonadPlus f) => (a -> m (Maybe (b,a))) -> a -> m (f b)
unfoldrM' f z = go z
where go z = do
x <- f z
case x of
Nothing -> return mzero
Just (x, z) -> do
xs <- go z
return (return x `mplus` xs)
This is how you might go about this using Pipes, which will allow you to
do the processing as a stream of results without resorting to lazy I/O.
import Network.HTTP
import Control.Monad
import Data.List.Split
import Control.Monad
import Control.Proxy
url = "http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing="
grabber :: (Proxy p) => Int -> () -> Producer p String IO ()
grabber start () = runIdentityP $ loop $ show start where
loop x = do
-- Grab the next value
x' <- lift $ getNext x
-- Send it down stream
respond x'
-- Keep grabbing
loop x'
-- Just prints the values recieved from up stream
printer :: (Proxy p, Show a) => () -> Consumer p a IO r
printer () = runIdentityP $ forever $ do
a <- request () -- Consume a value
lift $ putStrLn $ "Received a value: " ++ show a
getNext :: String -> IO String
getNext prev = do
rsp <- simpleHTTP (getRequest $ url ++ prev)
bdy <- getResponseBody rsp
let num = last $ splitWhen (== ' ') bdy
return num
main = runProxy $ grabber start >-> printer
So what you want is basically
iterateM :: Monad m => (a -> m a) -> a -> m [a]
iterateM action a = do
a' <- action a
liftM (a':) $ iterateM action a'
The problem is that this doesn't work lazily as one might expect: since the monadic bind is strict, you're stuck in an infinite loop, even if you only want to evaluate a finite number of as.