How to convert such a lisp function to ocaml? - ocaml

The function "good-red" used to calculate the 18 highest frequency numbers from a file ssqHitNum.txt.
(defun good-red ()
(let ((tab (make-hash-table)) (res '()) (nums) (sort-res))
(dotimes (i 33) (setf (gethash (+ i 1) tab) 0))
(with-open-file (stream "ssqHitNum.txt")
(loop :for line = (read-line stream nil)
:until (null line)
:do
(setq nums (butlast (str2lst (subseq line 6))))
(dolist (n nums) (incf (gethash n tab)))
))
(maphash #'(lambda (k v) (push (cons k v) res)) tab)
(setq sort-res (sort res #'> :key #'cdr))
;(print sort-res)
(subseq (mapcar #'car sort-res) 0 18)))
$head ssqHitNum.txt
10000 7 12 18 19 22 28 4
10000 16 17 23 26 31 32 11
10000 3 4 18 22 24 29 11
10000 4 9 10 18 29 32 8
10000 5 7 10 14 17 25 11
The number is between 1 and 33. Whether or not i use a hashtab and scan the file line by line as the Common Lisp code does in ocaml ? Or there is more elegant way using ocaml ?
Any suggestion is appreciated !

FWIW, in F# 3.0 you can just write it like this:
System.IO.File.ReadLines #"ssqHitNum.txt"
|> Seq.collect (fun s -> s.Split ' ' |> Seq.skip 1)
|> Seq.countBy id
|> Seq.sortBy (fun (_, p) -> -p)
|> Seq.take 18
In OCaml, I'd start by writing these useful library functions from scratch:
let readAllLines file =
let lines = ref [] in
let input = open_in file in
begin
try
while true do
lines := input_line input :: !lines
done
with
| End_of_file ->
close_in input
end;
List.rev !lines
let collect f xs =
List.concat (List.map f xs)
let countBy f xs =
let counts = Hashtbl.create 100 in
let find key = try Hashtbl.find counts (f key) with Not_found -> 0 in
let add key = Hashtbl.replace counts (f key) (1 + find key) in
List.iter add xs;
Hashtbl.fold (fun k n kns -> (k, n)::kns) table []
let sortBy f xs =
List.sort (fun x y -> compare (f x) (f y)) xs
let rec truncate n xs =
match n, xs with
| 0, _ | _, [] -> []
| n, x::xs -> x::truncate (n-1) xs
let rec skip n xs =
match n, xs with
| 0, xs -> xs
| n, [] -> []
| n, _::xs -> skip (n-1) xs
let (|>) x f = f x
let id x = x
and then write it the same way:
readAllLines "ssqHitNum.txt"
|> collect (fun s -> split ' ' s |> skip 1)
|> countBy id
|> sortBy (fun (_, p) -> -p)
|> truncate 18
The F# is still better because the lines are read on-demand whereas my OCaml reads everything into memory up-front.

From a similarly high-level point of view, using Batteries:
open Batteries_uni
let freq file best_n =
let table = Hashtbl.create 100 in
let freq_num num =
Hashtbl.replace table num (1 + Hashtbl.find_default table num 0) in
let freq_line line =
let nums = List.tl (String.nsplit line " ") in
List.iter freq_num nums in
Enum.iter freq_line (File.lines_of file);
let cmp (_,freq1) (_,freq2) = (* decreasing *) compare freq2 freq1 in
Hashtbl.enum table |> List.of_enum
|> List.sort ~cmp
|> List.take best_n
To test, from the toplevel:
#use "topfind";;
#require "batteries";;
#use "/tmp/test.ml";;
test "/tmp/test.txt" 18;;

As asked by z_axis, here is another solution using only the base library distributed with the OCaml compiler. It is a bit more verbose due to the absence of some convenience functions.
let freq file best_n =
let table = Hashtbl.create 100 in
let freq_num num =
Hashtbl.replace table num
(1 + try Hashtbl.find table num with Not_found -> 0) in
begin
let input = open_in file in
try while true do
let line = input_line input in
let nums = List.tl (Str.split (Str.regexp " +") line) in
List.iter freq_num nums
done with End_of_file -> close_in input
end;
let sorted =
let cmp (_,freq1) (_,freq2) = (* decreasing *) compare freq2 freq1 in
List.sort cmp (Hashtbl.fold (fun k x li -> (k,x)::li) table []) in
(* take not tail-rec, not a problem for small n such as n=18 *)
let rec take n = function
| li when n = 0 -> []
| [] -> []
| hd::tl -> hd :: take (n - 1) tl in
take best_n sorted
The regexp module Str is not linked by default, even if it is in the default search path, so you must explicitly compile the program with str.cma (for ocamlc) or str.cmxa (for ocamlopt). In the toplevel, #use "topfind";; then #require "str";; will do.

With a fixed set of small integers it may be simpler to use an array:
let good_red () =
let a = Array.make 33 0 in
let bump i = a.(i-1) <- a.(i-1) + 1 in
let rec iter_lines fh =
try
let words = Str.split (Str.regexp " +") (input_line fh) in
List.iter bump (List.map int_of_string (List.tl words));
iter_lines fh
with End_of_file -> () in
let fh = open_in "ssqHitNum.txt" in
iter_lines fh;
close_in fh;
let b = Array.mapi (fun i freq -> (i+1,freq)) a in
Array.sort (fun (i1,f1) (i2,f2) -> compare f2 f1) b;
Array.sub b 0 18;;
try
Array.iter (fun (i,freq) -> Printf.printf "%2d %2d\n" freq i) (good_red ())
with Invalid_argument _ -> print_endline "bad input"
As gasche mentions you need to compile with str.cma or str.cmxa.

I'm not sure to understand the problem you want to solve. (Why all your input lines start with 10000?)
If you just want to find the 18-th highest frequency numbers, you don't need to read line by line (and this is true in Lisp, in C, in Ocaml, ...), and Ocaml's Scanf.scanf "%d" (fun x -> ...) could do the input.
And using an Hashtbl.t is sensible in Ocaml.

Related

Why this program dosen't halt when only need to iterating throught a finite stream?

I'm trying to get a list of primes of two digits by running these codes in LearnOcaml. The codes compile if I restrict the parameter of the listify method, which returns a list from a stream, to be less than 20. Otherwise, it either never halt or return "Exception: Js_of_ocaml__Js.Error _.". I don't think the code is semantically wrong. So I'm
wondering if anyone can help resolve the problem?
type 'a stream = Eos | StrCons of 'a*(unit -> 'a stream)
(*integers from n onwards*)
let rec nums_from n =
StrCons(n,fun () -> nums_from (n+1))
let rec filterStr (test : 'a -> bool) (s: 'a stream) =
match s with
|Eos -> Eos
|StrCons(q,w) -> if test q then StrCons(q,fun ()-> filterStr test (w ()))
else filterStr test (w ())
(*Remove all numbers mod p*)
let sift p =
filterStr (fun x -> x mod p <> 0)
(*Sieves*)
let rec sieves s =
match s with
|Eos ->Eos
|StrCons(x,g) -> StrCons(x, fun ()-> sieves (sift x (g ())))
(*primes*)
let allprimes = sieves (nums_from 2)
let rec listify s n=
if n =0 then [] else
match s with
|Eos -> []
|StrCons(q,w) -> q::(listify (w ()) (n-1))
let twodigitsprimes = filterStr (fun x -> x > 10&& x<100) allprimes
let twodigitsprimeslist= listify twodigitsprimes 21
It appears that filterStr is looping while trying to create the StrCons that represents the next element after the 21st. Since there are only 21 2-digit primes, this will loop forever.
Note that when listify is called with n = 0, the StrCons has already been constructed; it just isn't examined. But the StrCons for this case diverges (and OCaml is a strict language).
You can get things to work using this version of listify:
let rec listify s n =
if n = 0 then []
else
match s with
| Eos -> []
| StrCons (q, w) ->
if n = 1 then [q] else q :: listify (w ()) (n - 1)

number of 5-digits numbers with no repeating digits bigger than 12345

I'm a beginner in OCaml and algorithms.
I'm trying to get the number of 5 digits numbers with no repeating digits bigger than 12345.
Here is what I did in OCaml, I tried to make as tail recursive as possible, and I also used streams. But still, due to size, it stack overflowed:
type 'a stream = Eos | StrCons of 'a * (unit -> 'a stream)
let rec numberfrom n= StrCons (n, fun ()-> numberfrom (n+1))
let nats = numberfrom 1
let rec listify st n f=
match st with
|Eos ->f []
|StrCons (m, a) ->if n=1 then f [m] else listify (a ()) (n-1) (fun y -> f (m::y))
let rec filter (test: 'a-> bool) (s: 'a stream) : 'a stream=
match s with
|Eos -> Eos
|StrCons(q,w) -> if test q then StrCons(q, fun ()->filter test (w ()))
else filter test (w ())
let rec check_dup l=
match l with
| [] -> false
| h::t->
let x = (List.filter (fun x -> x = h) t) in
if (x == []) then
check_dup t
else
true;;
let digits2 d =
let rec dig acc d =
if d < 10 then d::acc
else dig ((d mod 10)::acc) (d/10) in
dig [] d
let size a=
let rec helper n aa=
match aa with
|Eos-> n
|StrCons (q,w) -> helper (n+1) (w())
in helper 0 a
let result1 = filter (fun x -> x<99999 && x>=12345 && (not (check_dup (digits2 x)))) nats
(* unterminating : size result1 *)
(*StackOverflow: listify result1 10000 (fun x->x) *)
I can't reproduce your reported problem. When I load up your code I see this:
# List.length (listify result1 10000 (fun x -> x));;
- : int = 10000
# List.length (listify result1 26831 (fun x -> x));;
- : int = 26831
It's possible your system is more resource constrained than mine.
Let me just say that the usual way to code a tail recursive function is to build the list up in reverse, then reverse it at the end. That might look something like this:
let listify2 st n =
let rec ilist accum st k =
match st with
| Eos -> List.rev accum
| StrCons (m, a) ->
if k = 1 then List.rev (m :: accum)
else ilist (m :: accum) (a ()) (k - 1)
in
if n = 0 then []
else ilist [] st n
You still have the problem that listify doesn't terminate if you ask for more elements than there are in the stream. It might be better to introduce a method to detect the end of the stream and return Eos at that point. For example, the filter function might accept a function that returns three possible values (the element should be filtered out, the element should not be filtered out, the stream should end).
The problem is that the size of your stream result1 is undefined.
Indeed, nats is an never-ending stream: it never returns Eos.
However, filtering a never-ending stream results in another never-ending stream
since a filtered stream only returns Eos after the underlying stream does so:
let rec filter (test: 'a-> bool) (s: 'a stream) : 'a stream=
match s with
| Eos -> Eos
| StrCons(q,w) -> if test q then StrCons(q, fun ()->filter test (w ()))
else filter test (w ())
Consequently, size result1 is stuck trying to reach the end of integers.
Note also that, in recent version of the standard library, your type stream is called Seq.node.

merging 2 lazy lists type conflict

Why my merge function complains about its type ?
Isn't my x a type 'a seq ?
type 'a seq = Stop | Cons of 'a * (unit -> 'a seq)
let rec linear start step= (*builds a seq starting with 'start'*)
Cons (start, fun () -> linear (start+step) step)
let rec take n seq = match seq with (*take first n elem from seq*)
| Stop -> []
| Cons (a, f) -> if n < 1 then [] else a::(take (n-1) (f ()))
let rec merge seq1 seq2 = match seq1, seq2 with
| Stop, _ -> seq2
| _, Stop -> seq1
| Cons(h1, tf1), _ as x ->
Cons(h1, fun () -> merge (x) (tf1 ()))
let l1 = linear 1 1
let l2 = linear 100 100
let l3 = interleave l1 l2
I would like to see the right result for
take 10 l3
int list = [1; 100; 2; 200; 3; 300; 4; 400; 5; 500]
Another way to write my function (which works) would be
let rec merge seq1 seq2 = match seq1 with
| Stop -> Stop
| Cons (h, tf) -> Cons(h, fun () -> merge seq2 (tf ()))
but I don't get it , why the first merge doesn't work.
Thanks.
Just write (_ as x) because here, your as x catches the whole pattern.
So, what you see as :
| Cons(h1, tf1), (_ as x) -> ...
is actually parsed as
| (Cons(h1, tf1), _) as x -> ...
And you could actually write :
| Cons(h1, tf1), x -> ...
Which is far better ;-)
Or even
| Cons(h1, tf1), _ -> Cons(h1, fun () -> merge seq2 (tf1 ()))

Memoization list Ocaml

I have a recursive function and I want the rewriting in the Mémoïsant
My recursive function:
let rec sum_cube l =
match l with
| [] -> 0
| x :: s -> (x * x * x) + sum_cube s
and I tried with this:
let memo = Hashtbl.create 17
let rec sum_cub_memo l =
try
Hashtbl.find memo l
with Not_found ->
let fn = function
| [] -> 0
| x::s -> (x * x * x ) sum_cub_memo s
in
Hashtbl.add memo l fn
fn ;;
I have an error:
This expression has type int list -> int but an expression was expected of type int list!!
You should memoize not the function, but the result of the function, e.g., using your definition of sum_cube:
let sum_cube_memo xs =
try Hashtbl.find memo xs with Not_found ->
let res = sum_cube xs in
Hashtbl.add memo xs res;
res
This will work, however there is a caveat. You're using a list of integers as a key. That means, that first the key is transformed to its hash (basically O(n), and will take basically the same amount of time as computing the power of three), second, if there is a hash collision, then every list in the bucket will be compared with the argument list. As a result, your memoized function has the same complexity as your non-memoized function, it has worse performance, and also consumes unbound amount of memory. Is it worthwhile?
sum_cube without memorization.
let sum_cube l =
let cube x =x*x*x in
List.fold_left ( fun acc x -> acc+cube x) 0 l
sum_cube with memorization and trace.
let sum_cube l =
let memo = Hashtbl.create 17 in
let cube_memo x =
try
let xcube= Hashtbl.find memo x in
Printf.printf "find %d -> %d\n" x xcube;
xcube
with Not_found ->
let xcube=x*x*x in
Printf.printf "add %d -> %d\n" x xcube;
Hashtbl.add memo x xcube;
xcube
in
List.fold_left ( fun acc x -> acc+cube_memo x) 0 l
Test :
# sum_cube [4;4;2;3;4;2];;
add 4 -> 64
find 4 -> 64
add 2 -> 8
add 3 -> 27
find 4 -> 64
find 2 -> 8
- : int = 235

Most elegant combinations of elements in F#

One more question about most elegant and simple implementation of element combinations in F#.
It should return all combinations of input elements (either List or Sequence).
First argument is number of elements in a combination.
For example:
comb 2 [1;2;2;3];;
[[1;2]; [1;2]; [1;3]; [2;2]; [2;3]; [2;3]]
One less concise and more faster solution than ssp:
let rec comb n l =
match n, l with
| 0, _ -> [[]]
| _, [] -> []
| k, (x::xs) -> List.map ((#) [x]) (comb (k-1) xs) # comb k xs
let rec comb n l =
match (n,l) with
| (0,_) -> [[]]
| (_,[]) -> []
| (n,x::xs) ->
let useX = List.map (fun l -> x::l) (comb (n-1) xs)
let noX = comb n xs
useX # noX
There is more consise version of KVB's answer:
let rec comb n l =
match (n,l) with
| (0,_) -> [[]]
| (_,[]) -> []
| (n,x::xs) ->
List.flatten [(List.map (fun l -> x::l) (comb (n-1) xs)); (comb n xs)]
The accepted answer is gorgeous and quickly understandable if you are familiar with tree recursion. Since elegance was sought, opening this long dormant thread seems somewhat unnecessary.
However, a simpler solution was asked for. Iterative algorithms sometimes seem simpler to me. Furthermore, performance was mentioned as an indicator of quality, and iterative processes are sometimes faster than recursive ones.
The following code is tail recursive and generates an iterative process. It requires a third of the amount of time to compute combinations of size 12 from a list of 24 elements.
let combinations size aList =
let rec pairHeadAndTail acc bList =
match bList with
| [] -> acc
| x::xs -> pairHeadAndTail (List.Cons ((x,xs),acc)) xs
let remainderAfter = aList |> pairHeadAndTail [] |> Map.ofList
let rec comboIter n acc =
match n with
| 0 -> acc
| _ ->
acc
|> List.fold (fun acc alreadyChosenElems ->
match alreadyChosenElems with
| [] -> aList //Nothing chosen yet, therefore everything remains.
| lastChoice::_ -> remainderAfter.[lastChoice]
|> List.fold (fun acc elem ->
List.Cons (List.Cons (elem,alreadyChosenElems),acc)
) acc
) []
|> comboIter (n-1)
comboIter size [[]]
The idea that permits an iterative process is to pre-compute a map of the last chosen element to a list of the remaining available elements. This map is stored in remainderAfter.
The code is not concise, nor does it conform to lyrical meter and rhyme.
A naive implementation using sequence expression. Personally I often feel sequence expressions are easier to follow than other more dense functions.
let combinations (k : int) (xs : 'a list) : ('a list) seq =
let rec loop (k : int) (xs : 'a list) : ('a list) seq = seq {
match xs with
| [] -> ()
| xs when k = 1 -> for x in xs do yield [x]
| x::xs ->
let k' = k - 1
for ys in loop k' xs do
yield x :: ys
yield! loop k xs }
loop k xs
|> Seq.filter (List.length >> (=)k)
Method taken from Discrete Mathematics and Its Applications.
The result returns an ordered list of combinations stored in arrays.
And the index is 1-based.
let permutationA (currentSeq: int []) (n:int) (r:int): Unit =
let mutable i = r
while currentSeq.[i - 1] = n - r + i do
i <- (i - 1)
currentSeq.[i - 1] <- currentSeq.[i - 1] + 1
for j = i + 1 to r do
currentSeq.[j - 1] <- currentSeq.[i - 1] + j - i
()
let permutationNum (n:int) (r:int): int [] list =
if n >= r then
let endSeq = [|(n-r+1) .. n|]
let currentSeq: int [] = [|1 .. r|]
let mutable resultSet: int [] list = [Array.copy currentSeq];
while currentSeq <> endSeq do
permutationA currentSeq n r
resultSet <- (Array.copy currentSeq) :: resultSet
resultSet
else
[]
This solution is simple and helper function costs constant memory.
My solution is less concise, less effective (altho, no direct recursion used) but it trully returns all combinations (currently only pairs, need to extend filterOut so it can return a tuple of two lists, will do little later).
let comb lst =
let combHelper el lst =
lst |> List.map (fun lstEl -> el::[lstEl])
let filterOut el lst =
lst |> List.filter (fun lstEl -> lstEl <> el)
lst |> List.map (fun lstEl -> combHelper lstEl (filterOut lstEl lst)) |> List.concat
comb [1;2;3;4] will return:
[[1; 2]; [1; 3]; [1; 4]; [2; 1]; [2; 3]; [2; 4]; [3; 1]; [3; 2]; [3; 4]; [4; 1]; [4; 2]; [4; 3]]
Ok, just tail combinations little different approach (without using of library function)
let rec comb n lst =
let rec findChoices = function
| h::t -> (h,t) :: [ for (x,l) in findChoices t -> (x,l) ]
| [] -> []
[ if n=0 then yield [] else
for (e,r) in findChoices lst do
for o in comb (n-1) r do yield e::o ]