How to take sublist without first and last item with F#? - list

I have sorted list of integer values:
let ls = [1..4]
How can I get a sublist without first and the last element? (In the most optimal way)
The expected result is [2; 3].
This is what I have so far, and yeah, it's working, but I in my opinion it's just not the best approach.
[1..4] |> List.tail |> List.rev |> List.tail |> List.sort

A somewhat long answer incoming in response to your innocently worded qualifier: "In the most optimal way"
Optimal in terms of what?
Performance? (Most likely)
Performance but also include GC performance?
Memory usage?
x86?
x64?
And so on...
So I decided to measure some aspects of the problem.
I measured the different answers (added a non-idiomatic version as well) in this thread in various different context.
Without further ado here is the program I used to measure
open System
open System.Diagnostics
open System.IO
module so29100251 =
// Daystate solution (OP)
module Daystate =
// Applied minor fixes to it
let trim = function
| [] | [_] | [_;_] -> []
| ls -> ls |> List.tail |> List.rev |> List.tail |> List.rev
// kaefer solution
module kaefer =
type 'a State = Zero | One | Other of 'a
let skipFirstAndLast xss =
let rec aux acc = function
| _, [] -> List.rev acc
| Zero, x::xs -> aux acc (One, xs)
| One, x::xs -> aux acc (Other x, xs)
| (Other prev), x::xs -> aux (prev :: acc) (Other x, xs)
aux [] (Zero, xss)
// Petr solution
module Petr =
let rec trimImpl ls acc =
match ls, acc with
| [], _ -> acc
| h::[], acc -> List.rev acc
| h::n::t, [] -> trimImpl t [n]
| h::t, acc -> trimImpl t (h::acc)
let trim ls = trimImpl ls []
// NonIdiomatic solution
module NonIdiomatic =
let trim (hint : int) (ls : 'T list) =
// trims last of rest
// Can't ask for ls.Length as that is O(n)
let ra = ResizeArray<_> (hint)
// Can't use for x in list do as it relies on .GetEnumerator ()
let mutable c = ls
while not c.IsEmpty do
ra.Add c.Head
c <- c.Tail
let count = ra.Count
let mutable result = []
for i in (count - 2)..(-1)..1 do
result <- ra.[i]::result
result
open so29100251
type Time = MilliSeconds of int64
type TestKind<'T> =
| Functional of 'T
| MeasurePerformance of int*int
[<EntryPoint>]
let main argv =
let factor = 10000000
// let maxHint = Int32.MaxValue
let maxHint = 100
let time (action : unit -> 'T) : 'T*Time =
let sw = Stopwatch ()
sw.Start ()
let r = action ()
sw.Stop ()
r, MilliSeconds sw.ElapsedMilliseconds
let adapt fn hint ls = fn ls
let trimmers =
[|
"Daystate" , adapt Daystate.trim
"kaefer" , adapt kaefer.skipFirstAndLast
"Petr" , adapt Petr.trim
"NonIdiomatic" , NonIdiomatic.trim
|]
#if DEBUG
let functionalTestCases =
[|
Functional [] , "empty" , []
Functional [] , "singleton" , [1]
Functional [] , "duoton" , [1;2]
Functional [2] , "triplet" , [1;2;3]
Functional [2;3] , "quartet" , [1;2;3;4]
|]
let performanceMeasurements = [||]
#else
let functionalTestCases = [||]
let performanceMeasurements =
[|
"small" , 10
"big" , 1000
"bigger" , 100000
// "huge" , 10000000
|] |> Array.map (fun (name, size) -> MeasurePerformance (size, (factor / size)) , name , [for x in 1..size -> x])
#endif
let testCases =
[|
functionalTestCases
performanceMeasurements
|] |> Array.concat
use tsv = File.CreateText ("result.tsv")
tsv.WriteLine (sprintf "TRIMMER\tTESTCASE\tSIZE\tHINT\tRUNS\tMEMORY_BEFORE\tMEMORY_AFTER\tGC_TIME\tRUN_TIME")
for trimName, trim in trimmers do
for testKind, testCaseName, testCase in testCases do
match testKind with
| Functional expected ->
let actual = trim 0 testCase
if actual = expected then
printfn "SUCCESS: Functional test of %s trim on testcase %s successful" trimName testCaseName
else
printfn "FAILURE: Functional test of %s trim on testcase %s failed" trimName testCaseName
| MeasurePerformance (size,testRuns) ->
let hint = min size maxHint
let before = GC.GetTotalMemory(true)
printfn "MEASURE: Running performance measurement on %s trim using testcase %s..." trimName testCaseName
let timeMe () =
for x in 1..testRuns do
ignore <| trim hint testCase
let _, MilliSeconds ms = time timeMe
let after = GC.GetTotalMemory(false)
let timeGC () =
ignore <| GC.GetTotalMemory(true)
let _, MilliSeconds msGC = time timeMe
printfn "...%d ms (%d runs), %d (before) %d (after) %d ms (GC)" ms testRuns before after msGC
tsv.WriteLine (sprintf "%s\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d" trimName testCaseName size hint testRuns before after msGC ms)
0
I then measured the execution time and GC time on x64 and max size hint allowed:
(size hints is only used by the non-idiomatic version)
x86 and max size hint allowed:
x64 and max 100 hint allowed:
x86 and max 100 hint allowed:
Looking at the performance charts we can note some somewhat surprising things:
All variants are iterating 10000000 times. One would expect the execution time to not differ between the different variants but they do.
The crusty old x86 scores consistently better overall. I won't speculate why.
OPs initial version while seemingly wasteful scores pretty good. It's probably helped by that List.rev is very optimized (IIRC it does some safe cheating available only to F# devs)
The kaefer version while on paper a better solution seems to score the worst. I think it's because it allocates extra State objects which are heap based. (This should obviously not be interpreted as a criticism of kaefers skills)
The non-idiomatic solution scores good with good size hints but not as good as I expected. It might be that building the final lists is what costs most cycles. It might also be that tail recursive functions over lists are more efficient than while loops as IIRC pattern matching are more effective than calling List.Tail/List.Head/List.IsEmpty
GC time is almost as big as the execution time.
I expected the GC time of the non-idiomatic solution to be significantly lower than the rest. However, while the ResizeArray<_> are probably quick to collect the list objects aren't.
On x86 arch the performance difference between Petr solution and the non-idiomatic one might not warrant the extra complexity.
Some final thoughts:
OPs original solution did pretty good
Garbage Collection takes time
Always measure...
Hopefully it was somewhat interesting
Edit:
The GC performance measurement numbers should not be over-interpreted into some thing more than: "GC can be expensive"
I later changed from a while loop to tail-recursion over a list which did improve the performance somewhat but not enough to warrant an update of the charts.

This is one of the ways:
let rec trim ls acc =
match ls, acc with
| [], _ -> acc
| h::[], acc -> List.rev acc
| h::n::t, [] -> trim t [n]
| h::t, acc -> trim t (h::acc)
let reslt = trim ls []

You didn't require standard library functions to achieve this, your're just asking for an efficient way. Defining a recursive function with an accumulator which holds the intermediate results would then appear a viable solution, even when the list has to be reversed at its termination.
I'm providing a custom Discriminated Union to keep track of the state, this is modelled along the lines of the Option type with an extra case.
type 'a State = Zero | One | Other of 'a
let skipFirstAndLast xss =
let rec aux acc = function
| _, [] -> List.rev acc
| Zero, x::xs -> aux acc (One, xs)
| One, x::xs -> aux acc (Other x, xs)
| (Other prev), x::xs -> aux (prev :: acc) (Other x, xs)
aux [] (Zero, xss)
[1..4] |> skipFirstAndLast // val it : int list = [2; 3]

Related

Split a list in two and preserve order

How do you efficiently split a list in 2, preserving the order of the elements?
Here's an example of input and expected output
[] should produce ([],[])
[1;] can produce ([1;], []) or ([], [1;])
[1;2;3;4;] should produce ([1; 2;], [3; 4;])
[1;2;3;4;5;] can produce ([1;2;3;], [4;5;]) or ([1;2;], [3;4;5;])
I tried a few things but I'm unsure which is the most efficient... Maybe there is a solution out there that I'm missing completely(calls to C code don't count).
My first attempt was to use List's partition function with a ref to 1/2 the length of the list. This works but you walk through the whole list when you only need to cover half.
let split_list2 l =
let len = ref ((List.length l) / 2) in
List.partition (fun _ -> if !len = 0 then false else (len := !len - 1; true)) l
My next attempt was to use a accumulator and then reverse it. This only walks through half the list but I call reverse to correct the order of the accumulator.
let split_list4 l =
let len = List.length l in
let rec split_list4_aux ln acc lst =
if ln < 1
then
(List.rev acc, lst)
else
match lst with
| [] -> failwith "Invalid split"
| hd::tl ->
split_list4_aux (ln - 1) (hd::acc) tl in
split_list4_aux (len / 2) [] l
My final attempt used function closures for the accumulator and it works but I have no idea how efficient closures are.
let split_list3 l =
let len = List.length l in
let rec split_list3_aux ln func lst =
if ln < 1
then
(func [], lst)
else
match lst with
| hd::tl -> split_list3_aux (ln - 1) (fun t -> func (hd::t)) tl
| _ -> failwith "Invalid split" in
split_list3_aux (len / 2) (fun t -> t) l
So is there a standard way to split a list in OCaml(preserving element order) that's most efficient?
You need to traverse the whole list for all of your solutions. The List.length function traverses the whole list. But it's true that your later solutions re-use the tail of the original list rather than constructing a new list.
It is difficult to say how fast any given bit of code is going to be just by inspection. Generally it's good enough to think in aysmptotic O(f(n)) terms, then work on slow functions in detail through timing tests (of realistic data).
All of your answers look to be O(n), which is the best you can do since you clearly need to know the length of the list to get the answer.
Your split_list2 and split_list3 solutions look pretty complicated to me, so I would expect (intuitively) them to be slower. A closure is a fairly complicated data structure containing a function and the environment of accessible variables. So it's problaby not all that fast to construct one.
Your split_list4 solution is what I would code up myself.
If you really care about timings you should time your solutions on some long lists. Keep in mind that you might get different timings on different systems.
Couldn't give up this question. I had to find a way that I could walk through this list one time to create a split with order preserved..
How about this?
let split lst =
let cnt = ref 0 in
let acc = ref ([], []) in
let rec split_aux c l =
match l with
| [] -> cnt := (c / 2)
| hd::tl ->
(
split_aux (c + 1) tl;
let (f, s) = (!acc) in
if c < (!cnt)
then
acc := ((hd::f), s)
else
acc := (f, hd::s)
)
in
split_aux 0 lst; !acc

Applying Fold function in F#

let list_min_fold = List.fold (fun acc -> List.min acc ) 0 lst
printfn"Using regular List.fold function:\n The minimum is: %A\n"
(list_min_fold)
When I execute my code this error displays:
error FS0001: The type '('a -> 'b)' does not support the 'comparison' constraint. For example, it does not support the 'System.IComparable' interface
Why? Please help :(
Are you trying to find the smallest number in a list? If so, you need to use the min function (which takes just two arguments) rather than List.min (which takes a list of arguments):
To keep the code the most similar to your example, you can write (note also that starting with 0 is not going to work, so I used System.Int32.MaxValue instead):
let lst = [4;3;1;2;5;]
let list_min_fold = List.fold (fun acc -> min acc) System.Int32.MaxValue lst
It is also worth noting that the function you pass to fold takes two arguments - the state acc and the current value:
let list_min_fold = List.fold (fun acc v -> min acc v) System.Int32.MaxValue lst
But thanks to partial function application you can omit one of them (as you did), or both of them:
let list_min_fold = List.fold min System.Int32.MaxValue lst
as always Tomas answer is spot on so I have but a small remark:
as you probably saw it makes no sense to try to find the minimum of an empty list (so the function probably should be of type 'a option and when you have an non-empty list it's very easy to use List.reduce (which is basically just a fold for binary operations and min is a great candidate for such an operation):
let list_min xs =
match xs with
| [] -> None
| _ -> List.reduce min xs
|> Some
this way you get:
> list_min [2;1;5;3];;
val it : int option = Some 1
> list_min [2;1;5;3;0];;
val it : int option = Some 0
> list_min ([] : int list);;
val it : int option = None
ok it's a fair point that the question was about fold - so if it has to be exactly List.fold you can of course do (as TheInnerLight remarked):
let list_min xs =
match xs with
| [] -> None
| (x::xs) -> List.fold min x xs
|> Some

how to make these simple functions tail recursive in f#

I have these these two functions
//Remove all even indexed elements from a list and return the rest
let rec removeEven l =
match l with
| x0::x1::xs -> x1::removeEven (xs)
| [] -> []
| [_] -> []
//combine list members into pairs
let rec combinePair l =
match l with
| x0::x1::xs -> (x0,x1) :: combinePair(xs)
| [] -> []
| [_] -> []
That work.
But I thought now that I was at it that I might as well learn a bit about tail recursion which I'm having a hard time getting the grasp of.
That's why I thought that if I could get some help making functions I had made myself tail-recursive perhaps it would become more clear how it works, instead of reading an example somewhere which I might not understand as well as my own code (remember, I'm a complete f# newbie :))
Any other constructive comments about my code are of course most welcome!
A typical way of making functions tail-recursive in F# is using a list (acc in this case) to accumulate results and reversing it to get the correct order:
let removeEven l =
let rec loop xs acc =
match xs with
| [] | [_] -> acc
| _::x1::xs' -> loop xs' (x1::acc)
loop l [] |> List.rev
let combinePair l =
let rec loop xs acc =
match xs with
| [] | [_] -> acc
| x0::x1::xs' -> loop xs' ((x0, x1)::acc)
loop l [] |> List.rev
Since we simply return results after each recursive call of loop, these functions are tail-recursive.
Your functions look quite nice, but I still have several comments:
Indentation is important in F#. I would prefer match... with is a few spaces behind lec rec declaration.
Patter matching cases should follow a consistent order. It's a good idea to start with base cases first.
The function keyword is natural to use for shortening functions whenever you have a pattern of fun t -> match t with.
It's better to get rid of unnecessary parentheses, especially in functions with one argument.
Applying above comments, your functions become as follows:
// Remove all even indexed elements from a list and return the rest
let rec removeEven = function
| [] | [_] -> []
| _::x1::xs -> x1::removeEven xs
// Combine list members into pairs
let rec combinePair = function
| [] | [_] -> []
| x0::x1::xs -> (x0, x1)::combinePair xs
If you need a slower, less maintainable way to do it that uses more memory, you can use a continuation.
let removeEven items =
let rec loop f = function
| _::h::t -> loop (fun acc -> f (h::acc)) t
| [] | [_] -> f []
loop id items
But hey, it's tail-recursive.

How to partition a list with a given group size?

I'm looking for the best way to partition a list (or seq) so that groups have a given size.
for ex. let's say I want to group with size 2 (this could be any other number though):
let xs = [(a,b,c); (a,b,d); (y,z,y); (w,y,z); (n,y,z)]
let grouped = partitionBySize 2 input
// => [[(a,b,c);(a,b,d)]; [(y,z,y);(w,y,z)]; [(n,y,z)]]
The obvious way to implement partitionBySize would be by adding the position to every tuple in the input list so that it becomes
[(0,a,b,c), (1,a,b,d), (2,y,z,y), (3,w,y,z), (4,n,y,z)]
and then use GroupBy with
xs |> Seq.ofList |> Seq.GroupBy (function | (i,_,_,_) -> i - (i % n))
However this solution doesn't look very elegant to me.
Is there a better way to implement this function (maybe with a built-in function)?
This seems to be a repeating pattern that's not captured by any function in the F# core library. When solving similar problems earlier, I defined a function Seq.groupWhen (see F# snippets) that turns a sequence into groups. A new group is started when the predicate holds.
You could solve the problem using Seq.groupWhen similarly to Seq.group (by starting a new group at even index). Unlike with Seq.group, this is efficient, because Seq.groupWhen iterates over the input sequence just once:
[3;3;2;4;1;2;8]
|> Seq.mapi (fun i v -> i, v) // Add indices to the values (as first tuple element)
|> Seq.groupWhen (fun (i, v) -> i%2 = 0) // Start new group after every 2nd element
|> Seq.map (Seq.map snd) // Remove indices from the values
Implementing the function directly using recursion is probably easier - the solution from John does exactly what you need - but if you wanted to see a more general approach then Seq.groupWhen may be interesting.
List.chunkBySize (hat tip: Scott Wlaschin) is now available and does exactly what you're talking about. It appears to be new with F# 4.0.
let grouped = [1..10] |> List.chunkBySize 3
// val grouped : int list list =
// [[1; 2; 3]; [4; 5; 6]; [7; 8; 9]; [10]]
Seq.chunkBySize and Array.chunkBySize are also now available.
Here's a tail-recursive function that traverses the list once.
let chunksOf n items =
let rec loop i acc items =
seq {
match i, items, acc with
//exit if chunk size is zero or input list is empty
| _, [], [] | 0, _, [] -> ()
//counter=0 so yield group and continue looping
| 0, _, _::_ -> yield List.rev acc; yield! loop n [] items
//decrement counter, add head to group, and loop through tail
| _, h::t, _ -> yield! loop (i-1) (h::acc) t
//reached the end of input list, yield accumulated elements
//handles items.Length % n <> 0
| _, [], _ -> yield List.rev acc
}
loop n [] items
Usage
[1; 2; 3; 4; 5]
|> chunksOf 2
|> Seq.toList //[[1; 2]; [3; 4]; [5]]
I like the elegance of Tomas' approach, but I benchmarked both our functions using an input list of 10 million elements. This one clocked in at 9 secs vs 22 for his. Of course, as he admitted, the most efficient method would probably involve arrays/loops.
What about a recursive approach? - only requires a single pass
let rec partitionBySize length inp dummy =
match inp with
|h::t ->
if dummy |> List.length < length then
partitionBySize length t (h::dummy)
else dummy::(partitionBySize length t (h::[]))
|[] -> dummy::[]
Then invoke it with partitionBySize 2 xs []
let partitionBySize size xs =
let sq = ref (seq xs)
seq {
while (Seq.length !sq >= size) do
yield Seq.take size !sq
sq := Seq.skip size !sq
if not (Seq.isEmpty !sq) then yield !sq
}
// result to list, if you want
|> Seq.map (Seq.toList)
|> Seq.toList
UPDATE
let partitionBySize size (sq:seq<_>) =
seq {
let e = sq.GetEnumerator()
let empty = ref true;
while !empty do
yield seq { for i = 1 to size do
empty := e.MoveNext()
if !empty then yield e.Current
}
}
array slice version:
let partitionBySize size xs =
let xa = Array.ofList xs
let len = xa.Length
[
for i in 0..size..(len-1) do
yield ( if i + size >= len then xa.[i..] else xa.[i..(i+size-1)] ) |> Array.toList
]
Well, I was late for the party. The code below is a tail-recursive version using high-order functions on List:
let partitionBySize size xs =
let i = size - (List.length xs - 1) % size
let xss, _, _ =
List.foldBack( fun x (acc, ls, j) ->
if j = size then ((x::ls)::acc, [], 1)
else (acc, x::ls, j+1)
) xs ([], [], i)
xss
I did the same benchmark as Daniel did. This function is efficient while it is 2x faster than his approach on my machine. I also compared it with an array/loop version, they are comparable in terms of performance.
Moreover, unlike John's answer, this version preserves order of elements in inner lists.

Linked list partition function and reversed results

I wrote this F# function to partition a list up to a certain point and no further -- much like a cross between takeWhile and partition.
let partitionWhile c l =
let rec aux accl accr =
match accr with
| [] -> (accl, [])
| h::t ->
if c h then
aux (h::accl) t
else
(accl, accr)
aux [] l
The only problem is that the "taken" items are reversed:
> partitionWhile ((>=) 5) [1..10];;
val it : int list * int list = ([5; 4; 3; 2; 1], [6; 7; 8; 9; 10])
Other than resorting to calling rev, is there a way this function could be written that would have the first list be in the correct order?
Here's a continuation-based version. It's tail-recursive and returns the list in the original order.
let partitionWhileCps c l =
let rec aux f = function
| h::t when c h -> aux (fun (acc, l) -> f ((h::acc), l)) t
| l -> f ([], l)
aux id l
Here are some benchmarks to go along with the discussion following Brian's answer (and the accumulator version for reference):
let partitionWhileAcc c l =
let rec aux acc = function
| h::t when c h -> aux (h::acc) t
| l -> (List.rev acc, l)
aux [] l
let test =
let l = List.init 10000000 id
(fun f ->
let r = f ((>) 9999999) l
printfn "%A" r)
test partitionWhileCps // Real: 00:00:06.912, CPU: 00:00:07.347, GC gen0: 78, gen1: 65, gen2: 1
test partitionWhileAcc // Real: 00:00:03.755, CPU: 00:00:03.790, GC gen0: 52, gen1: 50, gen2: 1
Cps averaged ~7s, Acc ~4s. In short, continuations buy you nothing for this exercise.
I expect you can use continuations, but calling List.rev at the end is the best way to go.
I usually prefer Sequences over List as they are lazy and you got List.toSeq and Seq.toList functions to convert between them. Below is the implementation of your partitionWhile function using sequences.
let partitionWhile (c:'a -> bool) (l:'a list) =
let fromEnum (e:'a IEnumerator) =
seq { while e.MoveNext() do yield e.Current}
use e = (l |> List.toSeq).GetEnumerator()
(e |> fromEnum |> Seq.takeWhile c |> Seq.toList)
,(e |> fromEnum |> Seq.toList)
You can rewrite the function like this:
let partitionWhile c l =
let rec aux xs =
match xs with
| [] -> ([], [])
| h :: t ->
if c h then
let (good, bad) = aux t in
(h :: good, bad)
else
([], h :: t)
aux l
Yes, as Brian has noted it is no longer tail recursive, but it answers the question as stated. Incidentally, span in Haskell is implemented exactly the same way in Hugs:
span p [] = ([],[])
span p xs#(x:xs')
| p x = (x:ys, zs)
| otherwise = ([],xs)
where (ys,zs) = span p xs'
A good reason for preferring this version in Haskell is laziness: In the first version all the good elements are visited before the list is reversed. In the second version the first good element can be returned immediately.
I don't think I'm the only one to learn a lot from (struggling with) Daniel's CPS solution. In trying to figure it out, it helped me change several potentially (to the beginner) ambiguous list references, like so:
let partitionWhileCps cond l1 =
let rec aux f l2 =
match l2 with
| h::t when cond h -> aux (fun (acc, l3) -> f (h::acc, l3)) t
| l4 -> f ([], l4)
aux id l1
(Note that "[]" in the l4 match is the initial acc value.) I like this solution because it feels less kludgey not having to use List.rev, by drilling to the end of the first list and building the second list backwards. I think the other main way to avoid .rev would be to use tail recursion with a cons operation. Some languages optimize "tail recursion mod cons" in the same way as proper tail recursion (but Don Syme has said that this won't be coming to F#).
So this is not tail-recursive safe in F#, but it makes my answer an answer and avoids List.rev (this is ugly to have to access the two tuple elements and would be a more fitting parallel to the cps approach otherwise, I think, like if we only returned the first list):
let partitionWhileTrmc cond l1 =
let rec aux acc l2 =
match l2 with
| h::t when cond h -> ( h::fst(aux acc t), snd(aux acc t))
| l3 -> (acc, l3)
aux [] l1