Related
I am implementing a Twitter-like application in Erlang. I have both its distributed and non-distributed implementations. I am doing a benchmark but it seems I cannot find a way to send parallel requests to each user process for the distributed implementation. I am using a lists:foreach function to send "get tweets" to a list of client processes.My understanding is that the lists:foreach function steps into each element of the list one at a time realizing a sequential behavior which ultimately makes my distributed implementation result in an equal execution time with the non-distributed implementation. Is it possible to send the "get tweets" requests to different client processes all at once? This to me seems like a rather specific case and it has been difficult to search for a solution inside and outside StackOverflow.
test_get_tweets_Bench() ->
{ServerPid, UserInfos} = initializeForBench_server(),
run_benchmark("timeline",
fun () ->
lists:foreach(fun (_) ->
UserChoice = pick_random(UserInfos),
server:get_tweets(element(2, UserChoice), element(1, UserChoice), 1)
end,
lists:seq(1, 10000))
end,
30).
pick_random(List) ->
lists:nth(rand:uniform(length(List)), List).
userinfos is a list of the following form: [{userId,client_process},...]
After trying rpc:pmap instead of the lists:foreach, my benchmark has become approximately 3 times slower. The changes are as follows:
test_get_tweets_Bench2() ->
{ServerPid, UserInfos} = initializeForBench_server(),
run_benchmark("get_tweets 2",
fun () ->
rpc:pmap({?MODULE,do_apply},
[fun (_) ->
UserChoice = pick_random(UserInfos),
server:get_tweets(element(2, UserChoice), element(1, UserChoice), 1)
end],
lists:seq(1, 10000))
end,
30).
pick_random(List) ->
lists:nth(rand:uniform(length(List)), List).
do_apply(X,F)->
F(X).
I thought rpc:pmap would make my benchmark faster as it would send the get_tweet requests in parallel.
Below is my server module which is the API between my benchmark and my Twitter-like application. The API sends the requests from my benchmark to my Twitter-like application.
%% This module provides the protocol that is used to interact with an
%% implementation of a microblogging service.
%%
%% The interface is design to be synchrounous: it waits for the reply of the
%% system.
%%
%% This module defines the public API that is supposed to be used for
%% experiments. The semantics of the API here should remain unchanged.
-module(server).
-export([register_user/1,
subscribe/3,
get_timeline/3,
get_tweets/3,
tweet/3]).
%%
%% Server API
%%
% Register a new user. Returns its id and a pid that should be used for
% subsequent requests by this client.
-spec register_user(pid()) -> {integer(), pid()}.
register_user(ServerPid) ->
ServerPid ! {self(), register_user},
receive
{ResponsePid, registered_user, UserId} -> {UserId, ResponsePid}
end.
% Subscribe/follow another user.
-spec subscribe(pid(), integer(), integer()) -> ok.
subscribe(ServerPid, UserId, UserIdToSubscribeTo) ->
ServerPid ! {self(), subscribe, UserId, UserIdToSubscribeTo},
receive
{_ResponsePid, subscribed, UserId, UserIdToSubscribeTo} -> ok
end.
% Request a page of the timeline of a particular user.
% Request results can be 'paginated' to reduce the amount of data to be sent in
% a single response. This is up to the server.
-spec get_timeline(pid(), integer(), integer()) -> [{tweet, integer(), erlang:timestamp(), string()}].
get_timeline(ServerPid, UserId, Page) ->
ServerPid ! {self(), get_timeline, UserId, Page},
receive
{_ResponsePid, timeline, UserId, Page, Timeline} ->
Timeline
end.
% Request a page of tweets of a particular user.
% Request results can be 'paginated' to reduce the amount of data to be sent in
% a single response. This is up to the server.
-spec get_tweets(pid(), integer(), integer()) -> [{tweet, integer(), erlang:timestamp(), string()}].
get_tweets(ServerPid, UserId, Page) ->
ServerPid ! {self(), get_tweets, UserId, Page},
receive
{_ResponsePid, tweets, UserId, Page, Tweets} ->
Tweets
end.
% Submit a tweet for a user.
% (Authorization/security are not regarded in any way.)
-spec tweet(pid(), integer(), string()) -> erlang:timestamp().
tweet(ServerPid, UserId, Tweet) ->
ServerPid ! {self(), tweet, UserId, Tweet},
receive
{_ResponsePid, tweet_accepted, UserId, Timestamp} ->
Timestamp
end.
In Erlang, a message is exchanged form a process A to a process B. There is no feature available like a broadcast, or a selective broadcast. In your application I see 3 steps:
send a request to get the tweets from the users,
the user process prepare the answer and send it back to the requester
the initial process collects the answers
Sending the requests to the user processes and collecting the tweets (steps 1 and 3) cannot use parallelism. Of course you can use multiple processes to send the requests and collect the answers, up to 1 per user, but I guess that it is not the subject of your question.
What is feasible, is to ensure that the 3 steps are not done in sequence for each user process, but in parallel. I guess that the function server:get_tweets is responsible to send the request and collect the answers. If I am correct (I cannot know since You don't provide the code, and you ignore the returned values), you can use parallelism by splitting this function in 2, the first send the requests, the second collects the answers. (here is an example of code, I don't have tried or even compiled, so consider it with care :o)
test_get_tweets_Bench() ->
{ServerPid, UserInfos} = initializeForBench_server(),
run_benchmark("timeline",
fun () ->
% send the requests
List = lists:map(fun (_) ->
{UserId,Pid} = pick_random(UserInfos),
Ref = server:request_tweets(Pid,UserId),
{Ref,UserId}
end,
lists:seq(1, 10000)),
% collects the answers
collect(L,[])
end,
30).
collect([],Result) -> {ok,Result};
collect(List,ResultSoFar) ->
receive
{Ref,UserId,Tweets} ->
{ok,NewList} = remove_pending_request(Ref,UserId,List),
collect(Newlist,[{UserId,Tweets}|ResultSoFar])
after ?TIMEOUT
{error,timeout,List,ResultSoFar}
end.
remove_pending_request(Ref,UserId,List) ->
{value,{Ref,UserId},NewList} = lists:keytake(Ref,1,List),
{ok,NewList}.
pick_random(List) ->
lists:nth(rand:uniform(length(List)), List).
This is my other attempt at implementing a parallel benchmark which does not achieve any speed up.
get_tweets(Sender, UserId, Node) ->
server:get_tweets(Node, UserId, 0),
Sender ! done_get_tweets.
test_get_tweets3() ->
{_ServerId, UserInfos} = initializeForBench_server(),
run_benchmark("parallel get_tweet",
fun () ->
lists:foreach(
fun (_) ->
{UserId,Pid} = pick_random(UserInfos),
spawn(?MODULE, get_tweets, [self(), UserId, Pid])
end,
lists:seq(1, ?NUMBER_OF_REQUESTS)),
lists:foreach(fun (_) -> receive done_get_tweets -> ok end end, lists:seq(1, ?NUMBER_OF_REQUESTS))
end,
?RUNS).
I'm slowly learning erlang language using learnyousomeerlang site and I'm currently at "Rage Against The Finite-State Machines" chapter, which builds and describes how trade_fsm.erl works. As a part of my learning process I've decided to write an interface for this system, where you can control both trading sides by typing console commands. I think I've done a decent job at writing that, however for some reason I cannot understand, whenever I try to start trading, the clients crash. Here's how it goes:
5> z3:init("a", "b").
true
6> z3:display_pids().
First player pid: {<0.64.0>}
Second player pid: {<0.65.0>}.
done
7> z3:p1_propose_trade().
{a}: asking user <0.65.0> for a trade
{b}: <0.64.0> asked for a trade negotiation
done
8> z3:display_pids().
done
9>
And here's my code:
-module(z3).
-compile(export_all).
-record(state, {player1,
player2,
p1items=[],
p2items=[],
p1state,
p2state,
p1name="Carl",
p2name="FutureJim"}).
init(FirstName, SecondName) ->
{ok, Pid1} = trade_fsm:start_link(FirstName),
{ok, Pid2} = trade_fsm:start_link(SecondName),
S = #state{p1name=FirstName, p2name=SecondName,
player1=Pid1, player2=Pid2,
p1state=idle, p2state=idle},
register(?MODULE, spawn(?MODULE, loop, [S])).
display_pids() ->
?MODULE ! display_pids,
done.
p1_propose_trade() ->
?MODULE ! {wanna_trade, p1},
done.
p2_accept_trade() ->
?MODULE ! {accept_trade, p2},
done.
loop(S=#state{}) ->
receive
display_pids ->
io:format("First player pid: {~p}~nSecond player pid: {~p}.~n", [S#state.player1, S#state.player2]),
loop(S);
{wanna_trade, Player} ->
case Player of
p1 ->
trade_fsm:trade(S#state.player1, S#state.player2);
p2 ->
trade_fsm:trade(S#state.player2, S#state.player1);
_ ->
io:format("[Debug:] Invalid player.~n")
end,
loop(S);
{accept_trade, Player} ->
case Player of
p1 ->
trade_fsm:accept_trade(S#state.player1);
p2 ->
trade_fsm:accept_trade(S#state.player2);
_ ->
io:format("[Debug:] Invalid player.~n")
end,
loop(S);
_ ->
io:format("[Debug:] Received invalid command.~n"),
loop(S)
end.
Can anyone tell me why this code fails and how it should be implemented?
when you call z3:p1_propose_trade(). it sends the message {wanna_trade, p1} to registered process z3.
The message is interpreted in the loop function which calls trade_fsm:trade(S#state.player1, S#state.player2); converted into gen_fsm:sync_send_event(S#state.player1, {negotiate, S#state.player2}, 30000).. This call is a synchronous call which is waiting for a reply from the fsm, and which timeout after 30 seconds if it did not receive any answer.
In the state wait, you have caught the message in the statement:
idle({negotiate, OtherPid}, From, S=#state{}) ->
ask_negotiate(OtherPid, self()),
notice(S, "asking user ~p for a trade", [OtherPid]),
Ref = monitor(process, OtherPid),
{next_state, idle_wait, S#state{other=OtherPid, monitor=Ref, from=From}};
No reply value is returned to the caller. You should have used in the last line something like
{reply, Reply, idle_wait, S#state{other=OtherPid, monitor=Ref, from=From}};
or an explicit call to gen_fsm:reply/2.
I didn't dig too much in the code, but if you change it to:
idle({negotiate, OtherPid}, From, S=#state{}) ->
Reply = ask_negotiate(OtherPid, self()),
notice(S, "asking user ~p for a trade", [OtherPid]),
Ref = monitor(process, OtherPid),
{reply, Reply, idle_wait, S#state{other=OtherPid, monitor=Ref, from=From}};
it doesn't stop and seems to work properly.
Maybe some one knowing perfectly the behavior of the gen_fsm can give an explanation of what is going behind the scene (why is there nothing printout when the timeout ends, why the shell is ready for a new command while it should be waiting for an answer?):
If you call manually the function trade(OwnPid, OtherPid) you will see that it doesn't return until the 30 second timeout is reached, and then you get an error message.
when it is called by z3:p1_propose_trade()., after 30 seconds the error message is not shown but the registered process z3 dies.
[EDIT]
I have checked how the code should work, and, in fact, it doesn't seem necessary to modify the fsm code. The reply should come from the process 2, when the second user accept to negotiate. So you can't do the test this way (loop is waiting for an answer, and it cannot send the accept_trade). here is a session that works:
{ok,P1} = trade_fsm:start("a1").
{ok,P2} = trade_fsm:start("a2").
T = fun() -> io:format("~p~n",[trade_fsm:trade(P1,P2)]) end.
A = fun() -> io:format("~p~n",[trade_fsm:accept_trade(P2)]) end.
spawn(T). % use another process to avoid the shell to be locked
A().
You can change the "wanna_trade" interface to avoid the blocking issue
{wanna_trade, Player} ->
case Player of
p1 ->
spawn(fun() -> trade_fsm:trade(S#state.player1, S#state.player2) end);
p2 ->
spawn(fun() -> trade_fsm:trade(S#state.player2, S#state.player1) end);
_ ->
io:format("[Debug:] Invalid player.~n")
end,
loop(S);
I'm very new to Erlang and tried to implement a simple class that has some methods to simulate a database. insert() just inserts a key -> value in the process map, and retrieve() just returns the value from the map. However, I am getting stuck in the loop(). What am I doing wrong?
-module(db).
-export([start/0,stop/0,retrieve/1,insert/2]).
start() ->
register(db, spawn(fun() ->
loop()
end)
),
{started}.
insert(Key, Value) ->
rpc({insert, Key, Value}).
retrieve(Key) ->
rpc({retrieve, Key}).
stop() ->
rpc({stop}).
rpc(Request) ->
db ! {self(), Request},
receive
{db, Reply} ->
Reply
end.
loop() ->
receive
{rpc, {insert, Key, Value}} ->
put(Key, Value),
rpc ! {db, done},
loop();
{rpc, {retrieve, Key}} ->
Val = get(Key),
rpc ! {db, Val},
loop();
{rpc, {stop}} ->
exit(db,ok),
rpc ! {db, stopped}
end.
So, after compiling:
I first call db:start().
and then when trying db:insert("A", 1)., it gets stucked.
Thank you
The problem is in loop/0 function. You're using rpc atom to pattern match the messages received ({rpc, {insert, Key, Value}}), but, as you can see on rpc/1 function, you always send messages with the format {self(), Request} to db process.
self() function returns a PID in the format <X.Y.Z>, which will never match against the atom rpc
For example, let's say you're trying to insert some data using the function insert/2 and self() would return the PID <0.36.0>. When rpc/1 sends the message, on the line db ! {self(), {insert, Key, Value}}, loop/0 will receive {<0.36.0>, {insert, Key, Value}} message, which will never match against {rpc, {insert, Key, Value}}, because rpc is an atom.
The solution is to change rpc atom to a variable, like this:
loop() ->
receive
{Rpc, {insert, Key, Value}} ->
put(Key, Value),
Rpc ! {db, done},
loop();
{Rpc, {retrieve, Key}} ->
Val = get(Key),
Rpc ! {db, Val},
loop();
{Rpc, {stop}} ->
Rpc ! {db, stopped},
exit(whereis(db),ok)
end.
Erlang variables start with capital letters, that's why I used Rpc, instead of rpc.
P.S.: Actually, you had two other problems:
In the last part of loop/0, where you handle stop message, you call exit(db, ok) before you actually answer to rpc. In that case, you'd never receive the {db, stopped} message back from db process, which would be dead by that time. That's why I've changed the order, putting the exit/2 call after Rpc ! {db, stopped}.
When you call exit/2, you were passing db, which is an atom, as the first argument, but exit/2 function expects an PID as first argument, which would raise a badarg error. That's why I've changed it to exit(whereis(db), ok).
Let's walk through this a bit more carefully. What do you mean by "rpc"? "Remote Procedure Call" -- sure. But everything in Erlang is an rpc, so we tend not to use that term. Instead we distinguish between synchronous messages (where the caller blocks, waiting on a response) and aynchronous messages (where the caller just fires off a message and runs off without a care in the world). We tend to use the term "call" for a synch message and "cast" for an asynch message.
We can write that easily, as a call looks a lot like your rpc above, with the added idiom in Erlang of adding a unique reference value to tag the message and monitoring the process we sent a message to just in case it crashes (so we don't get left hanging, waiting for a response that will never come... which we'll touch on in your code in a bit):
% Synchronous handler
call(Proc, Request) ->
Ref = monitor(process, Proc),
Proc ! {self(), Ref, Request},
receive
{Ref, Res} ->
demonitor(Ref, [flush]),
Res;
{'DOWN', Ref, process, Proc, Reason} ->
{fail, Reason}
after 1000 ->
demonitor(Ref, [flush]),
{fail, timeout}
end.
Cast is a bit easier:
cast(Proc, Message) ->
Proc ! Message,
ok.
The definition of call above means that the process we are sending to will receive a message of the form {SenderPID, Reference, Message}. Note that this is different than {sender, reference, message}, as lower-case values are atoms, meaning they are their own values.
When we receive messages we are matching on the shape and values of the message received. That means if I have
receive
{number, X} ->
do_stuff(X)
end
in my code and the process sitting in that receive get a message {blah, 25} it will not match. If it receives another message {number, 26} then it will match, that receive will call do_stuff/1 and the process will continue on. (These two things -- the difference between atoms and Variables and the way matching in receive works -- is why your code is hanging.) The initial message, {blah, 25} will still be in the mailbox, though, at the front of the queue, so the next receive has a chance to match on it. This property of mailboxes is immensely useful sometimes.
But what does a catch-all look like?
Above you are expecting three kinds of messages:
{insert, Key, Value}
{retrieve, Key}
stop
You dressed them up differently, but that's the business end of what you are trying to do. Running the insert message through the call/2 function I wrote above it would wind up looking like this: {From, Ref, {insert, Key, Value}}. So if we expect any response from the process's receive loop we will need to match on that exact form. How do we catch unexpected messages or badly formed ones? At the end of the receive clause we can put a single naked variable to match on anything else:
loop(State) ->
receive
{From, Ref, {insert, Key, Value}} ->
NewState = insert(Key, Value, State),
From ! {Ref, ok},
loop(NewState);
{From, Ref, {retrieve, Key}} ->
Value = retrieve(Key, State),
From ! {Ref, {ok, Value}},
loop(State);
{From, Ref, stop} ->
ok = io:format("~tp: ~tp told me to stop!~n", [self(), From]),
From ! {Ref, shutting_down},
exit(normal)
Unexpected ->
ok = io:format("~tp: Received unexpected message: ~tp~n",
[self(), Unexpected]),
loop(State)
end.
You will notice that I am not using the process dictionary. DO NOT USE THE PROCESS DICTIONARY. This isn't what it is for. You'll overwrite something important. Or drop something important. Or... bleh, just don't do it. Use a dict or map or gb_tree or whatever instead, and pass it through as the process' State variable. This will become a very natural thing for you once you start writing OTP code later on.
Toy around with these things a bit and you will soon be happily spamming your processes to death.
I have an erlang project that makes a lot of concurrent SOAP requests to my application. Currently, it's limited by how many nodes are available, but I would like to adjust it so that each node can send more than one message at a time.
I've figured that problem out, but I don't know how to get a response back from process running the SOAP request.
This is my function that I'm attempting to use to do multiple threads:
batch(Url, Message, BatchSize) ->
inets:start(),
Threads = for(1, BatchSize, fun() -> spawn(fun() -> attack_thread() end) end),
lists:map(fun(Pid) -> Pid ! {Url, Message, self()} end, Threads).
This function gets called by the person who initiated the stress tester, it is called on every node in our network. It's called continually until all the requested number of SOAP requests have been sent and timed.
This is the attack_thread that is sent the message by the batch method:
attack_thread() ->
receive
{Url, Message, FromPID} ->
{TimeTaken, {ok, {{_, 200, _}, _, _}}} = timer:tc(httpc, request, [post, {Url, [{"connection", "close"}, {"charset", "utf-8"}], "text/xml", Message}, [], []]),
TimeTaken/1000/1000.
end
As you can see, I want it to return the number of seconds the SOAP request took. However, erlang's message passing (Pid ! Message) doesn't return anything useful.
How can I get a result back?
Each of your attack_thread() threads can simply drop a message in the mailbox of the process operating the batch/3 function:
FromPid ! {time_taken, self(), TimeTaken / 1000 / 1000}.
but then you need to collect the results:
batch(Url, Message, BatchSize) ->
inets:start(),
Pids = [spawn_link(fun attack_thread/0) || _ <- lists:seq(1, BatchSize],
[Pid ! {Url, Message, self()} || Pid <- Pids],
collect(Pids).
collect([]) -> [];
collect(Pids) ->
receive
{time_taken, P, Time} ->
[Time | collect(Pids -- [P])]
end.
Some other comments: you probably want spawn_link/1 here. If something dies along the way, you want the whole thing to die. Also, be sure to tune inets httpc a bit so it is more effective. You might also want to look at basho_bench or tsung.
Finally, you can use a closure directly rather than pass the url and message:
attack_thread(Url, Message, From) -> ...
So your spawn is:
Self = self(),
Pids = [spawn_link(fun() -> attack_thread(Url, Message, Self) end) || _ <- ...]
It avoids passing in the message in the beginning.
I want to spawn a number of processes that will respond to messages they receive. This is simple. However, I also want to have a process that is able to block the output of another process.
In another language I might set a flag and check the status of that flag before sending a message. But since Erlang doesn't have mutable variables, how might I achieve that?
I can certainly add a pattern in a receive to watch for a suppression message. I just don't know what to do with it next.
I don't really like the idea of using an ETS table just for this, as that breaks a nice distributed model. Equally I'm not too concerned about concurrency problems, but I'd like to design this in the most appropriate fashion.
Each echo server can have its own state which indicates whether it is currently muted. Other processes can toggle that state with mute/unmute messages. Before responding to a message, the echo server will check the state and act appropriately.
For example:
1> {ok, Pid} = echo:start_link().
{ok,<0.99.0>}
2> echo:echo(Pid, "this message will be echoed.").
#Ref<0.0.0.443>
3> echo:echo(Pid, "as will this message..").
#Ref<0.0.0.447>
4> echo:mute(Pid).
ok
5> echo:echo(Pid, "this message will not.").
#Ref<0.0.0.457>
6> echo:unmute(Pid).
ok
7> echo:echo(Pid, "but this one will..").
#Ref<0.0.0.461>
8> flush().
Shell got {#Ref<0.0.0.443>,"this message will be echoed."}
Shell got {#Ref<0.0.0.447>,"as will this message.."}
Shell got {#Ref<0.0.0.461>,"but this one will.."}
ok
9> echo:stop(Pid).
ok
code:
-module(echo).
-behaviour(gen_server).
%% API
-export([start_link/0,
echo/2,
mute/1,
unmute/1,
stop/1]).
%% gen_server callbacks
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
terminate/2, code_change/3]).
-define(SERVER, ?MODULE).
-record(state, {mute=false}).
%%%===================================================================
%%% API
%%%===================================================================
start_link() ->
gen_server:start_link(?MODULE, [], []).
echo(Pid, Msg) ->
Ref = make_ref(),
gen_server:cast(Pid, {echo, self(), Ref, Msg}),
Ref.
mute(Pid) ->
gen_server:cast(Pid, mute).
unmute(Pid) ->
gen_server:cast(Pid, unmute).
stop(Pid) ->
gen_server:cast(Pid, stop).
%%%===================================================================
%%% gen_server callbacks
%%%===================================================================
init([]) ->
{ok, #state{}}.
handle_call(_Request, _From, State) ->
Reply = ok,
{reply, Reply, State}.
handle_cast({echo, From, Tag, Msg}, #state{mute=false} = State) ->
From ! {Tag, Msg},
{noreply, State};
handle_cast({echo, _From, _Tag, _Msg}, #state{mute=true} = State) ->
{noreply, State};
handle_cast(mute, State) ->
{noreply, State#state{mute=true}};
handle_cast(unmute, State) ->
{noreply, State#state{mute=false}};
handle_cast(stop, State) ->
{stop, normal, State};
handle_cast(_Msg, State) ->
{noreply, State}.
handle_info(_Info, State) ->
{noreply, State}.
terminate(_Reason, _State) ->
ok.
code_change(_OldVsn, State, _Extra) ->
{ok, State}.