I am implementing a Twitter-like application in Erlang. I have both its distributed and non-distributed implementations. I am doing a benchmark but it seems I cannot find a way to send parallel requests to each user process for the distributed implementation. I am using a lists:foreach function to send "get tweets" to a list of client processes.My understanding is that the lists:foreach function steps into each element of the list one at a time realizing a sequential behavior which ultimately makes my distributed implementation result in an equal execution time with the non-distributed implementation. Is it possible to send the "get tweets" requests to different client processes all at once? This to me seems like a rather specific case and it has been difficult to search for a solution inside and outside StackOverflow.
test_get_tweets_Bench() ->
{ServerPid, UserInfos} = initializeForBench_server(),
run_benchmark("timeline",
fun () ->
lists:foreach(fun (_) ->
UserChoice = pick_random(UserInfos),
server:get_tweets(element(2, UserChoice), element(1, UserChoice), 1)
end,
lists:seq(1, 10000))
end,
30).
pick_random(List) ->
lists:nth(rand:uniform(length(List)), List).
userinfos is a list of the following form: [{userId,client_process},...]
After trying rpc:pmap instead of the lists:foreach, my benchmark has become approximately 3 times slower. The changes are as follows:
test_get_tweets_Bench2() ->
{ServerPid, UserInfos} = initializeForBench_server(),
run_benchmark("get_tweets 2",
fun () ->
rpc:pmap({?MODULE,do_apply},
[fun (_) ->
UserChoice = pick_random(UserInfos),
server:get_tweets(element(2, UserChoice), element(1, UserChoice), 1)
end],
lists:seq(1, 10000))
end,
30).
pick_random(List) ->
lists:nth(rand:uniform(length(List)), List).
do_apply(X,F)->
F(X).
I thought rpc:pmap would make my benchmark faster as it would send the get_tweet requests in parallel.
Below is my server module which is the API between my benchmark and my Twitter-like application. The API sends the requests from my benchmark to my Twitter-like application.
%% This module provides the protocol that is used to interact with an
%% implementation of a microblogging service.
%%
%% The interface is design to be synchrounous: it waits for the reply of the
%% system.
%%
%% This module defines the public API that is supposed to be used for
%% experiments. The semantics of the API here should remain unchanged.
-module(server).
-export([register_user/1,
subscribe/3,
get_timeline/3,
get_tweets/3,
tweet/3]).
%%
%% Server API
%%
% Register a new user. Returns its id and a pid that should be used for
% subsequent requests by this client.
-spec register_user(pid()) -> {integer(), pid()}.
register_user(ServerPid) ->
ServerPid ! {self(), register_user},
receive
{ResponsePid, registered_user, UserId} -> {UserId, ResponsePid}
end.
% Subscribe/follow another user.
-spec subscribe(pid(), integer(), integer()) -> ok.
subscribe(ServerPid, UserId, UserIdToSubscribeTo) ->
ServerPid ! {self(), subscribe, UserId, UserIdToSubscribeTo},
receive
{_ResponsePid, subscribed, UserId, UserIdToSubscribeTo} -> ok
end.
% Request a page of the timeline of a particular user.
% Request results can be 'paginated' to reduce the amount of data to be sent in
% a single response. This is up to the server.
-spec get_timeline(pid(), integer(), integer()) -> [{tweet, integer(), erlang:timestamp(), string()}].
get_timeline(ServerPid, UserId, Page) ->
ServerPid ! {self(), get_timeline, UserId, Page},
receive
{_ResponsePid, timeline, UserId, Page, Timeline} ->
Timeline
end.
% Request a page of tweets of a particular user.
% Request results can be 'paginated' to reduce the amount of data to be sent in
% a single response. This is up to the server.
-spec get_tweets(pid(), integer(), integer()) -> [{tweet, integer(), erlang:timestamp(), string()}].
get_tweets(ServerPid, UserId, Page) ->
ServerPid ! {self(), get_tweets, UserId, Page},
receive
{_ResponsePid, tweets, UserId, Page, Tweets} ->
Tweets
end.
% Submit a tweet for a user.
% (Authorization/security are not regarded in any way.)
-spec tweet(pid(), integer(), string()) -> erlang:timestamp().
tweet(ServerPid, UserId, Tweet) ->
ServerPid ! {self(), tweet, UserId, Tweet},
receive
{_ResponsePid, tweet_accepted, UserId, Timestamp} ->
Timestamp
end.
In Erlang, a message is exchanged form a process A to a process B. There is no feature available like a broadcast, or a selective broadcast. In your application I see 3 steps:
send a request to get the tweets from the users,
the user process prepare the answer and send it back to the requester
the initial process collects the answers
Sending the requests to the user processes and collecting the tweets (steps 1 and 3) cannot use parallelism. Of course you can use multiple processes to send the requests and collect the answers, up to 1 per user, but I guess that it is not the subject of your question.
What is feasible, is to ensure that the 3 steps are not done in sequence for each user process, but in parallel. I guess that the function server:get_tweets is responsible to send the request and collect the answers. If I am correct (I cannot know since You don't provide the code, and you ignore the returned values), you can use parallelism by splitting this function in 2, the first send the requests, the second collects the answers. (here is an example of code, I don't have tried or even compiled, so consider it with care :o)
test_get_tweets_Bench() ->
{ServerPid, UserInfos} = initializeForBench_server(),
run_benchmark("timeline",
fun () ->
% send the requests
List = lists:map(fun (_) ->
{UserId,Pid} = pick_random(UserInfos),
Ref = server:request_tweets(Pid,UserId),
{Ref,UserId}
end,
lists:seq(1, 10000)),
% collects the answers
collect(L,[])
end,
30).
collect([],Result) -> {ok,Result};
collect(List,ResultSoFar) ->
receive
{Ref,UserId,Tweets} ->
{ok,NewList} = remove_pending_request(Ref,UserId,List),
collect(Newlist,[{UserId,Tweets}|ResultSoFar])
after ?TIMEOUT
{error,timeout,List,ResultSoFar}
end.
remove_pending_request(Ref,UserId,List) ->
{value,{Ref,UserId},NewList} = lists:keytake(Ref,1,List),
{ok,NewList}.
pick_random(List) ->
lists:nth(rand:uniform(length(List)), List).
This is my other attempt at implementing a parallel benchmark which does not achieve any speed up.
get_tweets(Sender, UserId, Node) ->
server:get_tweets(Node, UserId, 0),
Sender ! done_get_tweets.
test_get_tweets3() ->
{_ServerId, UserInfos} = initializeForBench_server(),
run_benchmark("parallel get_tweet",
fun () ->
lists:foreach(
fun (_) ->
{UserId,Pid} = pick_random(UserInfos),
spawn(?MODULE, get_tweets, [self(), UserId, Pid])
end,
lists:seq(1, ?NUMBER_OF_REQUESTS)),
lists:foreach(fun (_) -> receive done_get_tweets -> ok end end, lists:seq(1, ?NUMBER_OF_REQUESTS))
end,
?RUNS).
In finch, we can define router, request parameters, request body like this.
case class Test(name: String, age: Int)
val router: Endpoint[Test] = post("hello") { Ok(Test("name", 30)) }
val requestBody: Endpoint[Test] = body.as[Test]
val requestParameters: Endpoint[Test] = Endpoint.derive[Test].fromParams
The benefit is that we can compose EndPoint together. For example, I can define:
The request path is hello and Parameter should have name and age. (router :: requestParameters)
However, I can still run an invalid endpoint which doesnt include any request path successfully (There is actually no compilation error)
Await.ready(Http.serve(":3000", requestParameters.toService))
The result is returning 404 not found page. Even though I expect that the error should report earlier like compilation error. I wonder that is this a design drawback or it is actually finch trying to fix ?
Many thanks in advance
First of all, thanks a lot for asking this!
Let me give you some insight on how Finch's endpoints work. If you speak category theory, an Endpoint is an Applicative embedding StateT represented as something close to Input => Option[(Input, A)].
Simply speaking, an endpoint takes an Input that wraps an HTTP request and also captures the current path (eg: /foo/bar/baz). When endpoint is applied on to a given request and either matches it (returning Some) or falls over (returning None). When matched, it changes the state of the Input, usually removing the first path segment from it (eg: removing foo from /foo/bar/baz) so the next endpoint is the chain can work with a new Input (and new path).
Once endpoint is matched, Finch checks if there is something else left in the Input that wasn't matched. If something is left, the match considered unsuccessful and your service returns 404.
scala> val e = "foo" :: "bar"
e: io.finch.Endpoint[shapeless.HNil] = foo/bar
scala> e(Input(Request("/foo/bar/baz"))).get._1.path
res1: Seq[String] = List(baz)
When it comes to endpoints matching/extracting query-string params, no path segments are being touched there and the state is passed to the next endpoint unchanged. So when an endpoint param("foo") is applied, the path is not affected. That simply means, the only way to serve a query-string endpoint (note: an endpoint that only extract query-string params) is to send it a request with empty path /.
scala> val s = param("foo").toService
s: com.twitter.finagle.Service[com.twitter.finagle.http.Request,com.twitter.finagle.http.Response] = <function1>
scala> s(Request("/", "foo" -> "bar")).get
res4: com.twitter.finagle.http.Response = Response("HTTP/1.1 Status(200)")
scala> s(Request("/bar", "foo" -> "bar")).get
res5: com.twitter.finagle.http.Response = Response("HTTP/1.1 Status(404)")
I'm trying to test Nancy modules with F# as described here, the thing is I can't see how to pass the second parameter in F#.
Here's what I have so far:
let should_return_status_ok_for_get() =
let bootstrapper = new DefaultNancyBootstrapper()
let browser = new Browser(bootstrapper, fun req -> req.Accept(new Responses.Negotiation.MediaRange("application/json")))
let result = browser.Get("/Menu", fun req -> req.HttpRequest())
Assert.AreEqual (HttpStatusCode.OK, result.StatusCode)
result
in the example, I should be able to instantiate a Browser object to test a specific Module:
var browser = new Browser(with => with.Module(new MySimpleModule()));
But I get a compile time error in F# when I try:
let browser = new Browser(fun req -> req.Module(new MenuModule()))
EDIT Error: No overloads match for method 'Browser'
Are there any examples of this in F#?
Also, is this the best way to go about this in F#?
This is how I run Nancy tests in F#:
I create a new bootstrapper in my test project by deriving from the DefaultNancyBootstrapper. I use this bootstrapper to register my mocks:
type Bootstrapper() =
inherit DefaultNancyBootstrapper()
override this.ConfigureApplicationContainer(container : TinyIoCContainer) =
base.ConfigureApplicationContainer(container)
container.Register<IMyClass, MyMockClass>() |> ignore
Then I write a simple test method to execute a GET request like so:
[<TestFixture>]
type ``Health Check Tests`` () =
[<Test>]
member test.``Given the service is healthy the health check endpoint returns a HTTP 200 response with status message "Everything is OK"`` () =
let bootstrapper = new Bootstrapper()
let browser = new Browser(bootstrapper)
let result = browser.Get("/healthcheck")
let healthCheckResponse = JsonSerializer.deserialize<HealthCheckResponse> <| result.Body.AsString()
result.StatusCode |> should equal HttpStatusCode.OK
healthCheckResponse.Message |> should equal "Everything is OK"
Let me know if this helps!
I want to create a service who generates its HTML according to the parameter given and a map. Given the parameter, the service search in the map for the html, and a function to launch on client side.
type sample =
(string (* little text *)*
Html5_types.html Eliom_content.Html5.elt (* html page *) *
(unit -> unit)(* Demonstration function *))
Given that the function is to be launched on client side, I insert it in the map as a client value :
{client{
let demo_function = ignore (Ojquery.add_html
(Ojquery.jQ "li") "<p id='test1'>new paragraph</p>") }}
let get_samples () =
let samples_map = Samples.empty in
let samples_map = Samples.add "add_html"
("text",
(Eliom_tools.F.html
(** html stuff **)
),
{unit->unit{demo_function}}) samples_map in
samples_map
And then I register the service like this :
let sample_service =
Eliom_service.service
~path:["examples"]
~get_params:Eliom_parameter.(string "entry")
()
let () =
Examples_app.register
~service:sample_service
(fun (entry) () ->
try
(let entry = Samples.find entry samples_map in
let html = ((function (name, html, func) -> html) entry) in
let func = ((function (name, html, func) -> func) entry) in
ignore {unit{%func ()}};
Lwt.return (html))
with Not_found -> Lwt.return (not_found)
)
The rest of the code is pretty much only the result of a classic eliom-distillery, with the inclusion of the ojquery package for the client function used.
The compilation phase goes smoothly, but when I try to launch the server, I get the following error message :
ocsigenserver: main: Fatal - Error in configuration file: Error while parsing configuration file: Eliom: while loading local/lib/examples/examples.cma: Failure("That function cannot be called here because it needs information about the request or the site.")
My first guess was that it is due to the fact that I store client values outside of a service, but is there any way to store this kind of values on the server?
I tried to wrap them in regular functions :
let demo_serv_func () = {unit{demo_client_func ()}}
But the problem remained...
I found the issue. The problem was not because I stored client functions, but because I used Eliom_tools.F.html outside of a service.
It happens that Eliom_tools needs the context of the service to function, and since I was storing it outside of the service, it could not work.
I solved the issue by using Eliom_tools inside the service, and storing the body of the HTML page in the map.