let write_table_channel ch n =
iter
(fun x ->
iter
(fun i ->
output_string ch (string_of_int i);
output_string ch "\t")
(map (( * ) x) (numlist n));
output_string ch "\n")
(numlist n)
This function is a bit confusing, first of all nested functions are overall confusing but mainly if someone could outline in particular what the function is doing this would be very helpful the main issue is the idea of nested functions
These are not nested functions but anonymous functions. They are indeed confusing and it is a good style to use nested functions instead. Much like you, I am very confused with this function, so to understand it, I performed a series of simple rewritings. Every time I see a
... (fun <args> -> <body>) ...
I rewrite it with
let somefun <args> = <body> in
... somefun ...
In other words, I turn anonymous functions into named functions, and in the process try to give them more meaningful names. Let's do this in a series of steps,
We start with step 0, the initial function,
let write_table_channel ch n =
iter
(fun x ->
iter
(fun i ->
output_string ch (string_of_int i);
output_string ch "\t")
(map (( * ) x) (numlist n));
output_string ch "\n")
(numlist n)
Next, we see nested loops iterations, let's pick the outer loop and name it outer for starters,
let write_table_channel ch n =
let outer x =
iter
(fun i ->
output_string ch (string_of_int i);
output_string ch "\t")
(map (( * ) x) (numlist n));
output_string ch "\n" in
iter outer (numlist n)
Now let's extract the inner loop from the outer,
let write_table_channel ch n =
let inner i =
output_string ch (string_of_int i);
output_string ch "\t" in
let outer x =
iter inner (map (( * ) x) (numlist n));
output_string ch "\n" in
iter outer (numlist n)
Now we have small functions that are easy to understand, let's give them meaningful names,
let write_table_channel ch n =
let print_number i =
output_string ch (string_of_int i);
output_string ch "\t" in
let multiply_by_x_and_print x =
iter print_number (map (( * ) x) (numlist n));
output_string ch "\n" in
iter multiply_by_x_and_print (numlist n)
Now it is much easier to read, and, surprisingly not even bigger, the same 8 lines of code!
So what are anonymous functions, also called lambdas, and when they are useful? As their name suggests, anonymous functions are functions that do not have names and are defined in place. They are pretty useful in conjunction with simple (non-nested) iterators, especially when the function itself is small and self-describing, e.g.,
let print_ints ch x =
List.iter (fun i ->
output_string ch (string_of_int i);
output_string ch "\t") x
Since functions in OCaml are curried by default and using the apply operator ##, we can rewrite our function as,
let print_ints ch = List.iter ## fun i ->
output_string ch (string_of_int i);
output_string ch "\t"
Notice, that we get rid of the x argument and there are no more parentheses.
Proper indentation is important to following flow of code. I have edited the question to fix this.
It may also help to use let bindings to break out functionality. For instance, we can create a local function print rather than passing this anonymous function. See the "Naming anonymous functions" section of the OCaml Programming Guidelines.
(fun i ->
output_string ch (string_of_int i);
output_string ch "\t")
There are also repeated calls to numList n which can be factored out.
let write_table_channel ch n =
let print i =
output_string ch (string_of_int i);
output_string ch "\t"
in
let nums = (numList n) in
iter
(fun x ->
iter print (map (( * ) x) nums);
output_string ch "\n")
nums
Hopefully that's easier to digest.
Related
I'm reading an input file of several lines. Each line has the following format:
Greeting "hello"
Greeting " Good morning"
Sit
Smile
Question "How are you?"
My current can read each line into a string list. Then I process it using this function which is supposed to break it into a string list list:
let rec process (l : string list) (acc : string list list) : string list list =
match l with
| [] -> acc
| hd :: tl -> String.split_on_char ' ' hd :: (process tl acc)
Which, unfortunately, does not work, since it also splits spaces inside quotation marks. Anyone think of a the right way to do this, possibly using map or fold_left, etc? This would be my expected output:
[["Greeting"; "/"hello/""];[Greeting; "/" Good morning"];["Sit"]]
and so on. Thank you!
You want a real (but very simple) lexical analysis. IMHO this is beyond what you can do with simple string splitting.
A scanner takes a stream of characters and returns the next token it sees. You can make a string into a stream by having an index that traverses the string.
Here is a scanner that is roughly what you would want:
let rec scan s offset =
let slen = String.length s in
if offset >= slen then
None
else if s.[offset] = ' ' then
scan s (offset + 1)
else if s.[offset] = '"' then
let rec qlook loff =
if loff >= slen then
(* Unterminated quotation *)
let tok = String.sub s offset (slen - offset) in
Some (tok, slen)
else if s.[loff] = '"' then
let tok = String.sub s offset (loff - offset + 1) in
Some (tok, loff + 1)
else qlook (loff + 1)
in
qlook (offset + 1)
else
let rec wlook loff =
if loff >= slen then
let tok = String.sub s offset (slen - offset) in
Some (tok, slen)
else if s.[loff] = ' ' || s.[loff] = '"' then
let tok = String.sub s offset (loff - offset) in
Some (tok, loff)
else
wlook (loff + 1)
in
wlook (offset + 1)
It handles a few cases that you didn't specify: what to do if there is an unclosed quotation. What to do with something like abc"def ghi".
The scanner returns None at the end of the string, or Some (token, offset), i.e., the next token and the offset to continue scanning.
A recursive function to break up a string would look something like this:
let split s =
let rec isplit accum offset =
match scan s offset with
| None -> List.rev accum
| Some (tok, offset') -> isplit (tok :: accum) offset'
in
isplit [] 0
This can be visualized with a state machine. You have 2 main states: looking for ' ' and looking for '"'. Processing strings is ugly and you can't pattern match it. So first thing I did is turn the string into a char list. Implementing the two states then becomes simple:
let split s =
let rec split_space acc word = function
| [] -> List.rev (List.rev word::acc)
| ' '::xs -> split_space (List.rev word::acc) [] xs
| '"'::xs -> find_quote acc ('"'::word) xs
| x::xs -> split_space acc (x::word) xs
and find_quote acc word = function
| [] -> List.rev (List.rev word::acc)
| '"'::xs -> split_space acc ('"'::word) xs
| x::xs -> find_quote acc (x::word) xs
in
split_space [] [] s
;;
# split ['a';'b';' ';'"';'c';' ';'d';'"';' ';'e'];;
- : char list list = [['a'; 'b']; ['"'; 'c'; ' '; 'd'; '"']; ['e']]
Now if you want to do it with strings that's left to you. The Idea would be the same. Or you can just turn the char list list into a string list at the end.
I am newbie to SML, trying to write recursive program to delete chars from a string:
remCharR: char * string -> string
So far wrote this non-recursive prog. Need help to write recursive one.
- fun stripchars(string,chars) = let
= fun aux c =
= if String.isSubstring(str c) chars then
= ""
= else
= str c
= in
= String.translate aux string
= end
= ;
You have already found a very idiomatic way to do this. Explicit recursion is not a goal in itself, except perhaps in a learning environment. That is, explicit recursion is, compared to your current solution, encumbered with a description of the mechanics of how you achieve the result, but not what the result is.
Here is one way you can use explicit recursion by converting to a list:
fun remCharR (c, s) =
let fun rem [] = []
| rem (c'::cs) =
if c = c'
then rem cs
else c'::rem cs
in implode (rem (explode s)) end
The conversion to list (using explode) is inefficient, since you can iterate the elements of a string without creating a list of the same elements. Generating a list of non-removed chars is not necessarily a bad choice, though, since with immutable strings, you don't know exactly how long your end-result is going to be without first having traversed the string. The String.translate function produces a list of strings which it then concatenates. You could do something similar.
So if you replace the initial conversion to list with a string traversal (fold),
fun fold_string f e0 s =
let val max = String.size s
fun aux i e =
if i < max
then let val c = String.sub (s, i)
in aux (i+1) (f (c, e))
end
else e
in aux 0 e0 end
you could then create a string-based filter function (much alike the String.translate function you already found, but less general):
fun string_filter p s =
implode (fold_string (fn (c, res) => if p c then c::res else res) [] s)
fun remCharR (c, s) =
string_filter (fn c' => c <> c') s
Except, you'll notice, it accidentally reverses the string because it folds from the left; you can fold from the right (efficient, but different semantics) or reverse the list (inefficient). I'll leave that as an exercise for you to choose between and improve.
As you can see, in avoiding String.translate I've built other generic helper functions so that the remCharR function does not contain explicit recursion, but rather depends on more readable high-level functions.
Update: String.translate actually does some pretty smart things wrt. memory use.
Here is Moscow ML's version of String.translate:
fun translate f s =
Strbase.translate f (s, 0, size s);
with Strbase.translate looking like:
fun translate f (s,i,n) =
let val stop = i+n
fun h j res = if j>=stop then res
else h (j+1) (f(sub_ s j) :: res)
in revconcat(h i []) end;
and with the helper function revconcat:
fun revconcat strs =
let fun acc [] len = len
| acc (v1::vr) len = acc vr (size v1 + len)
val len = acc strs 0
val newstr = if len > maxlen then raise Size else mkstring_ len
fun copyall to [] = () (* Now: to = 0. *)
| copyall to (v1::vr) =
let val len1 = size v1
val to = to - len1
in blit_ v1 0 newstr to len1; copyall to vr end
in copyall len strs; newstr end;
So it first calculates the total length of the final string by summing the length of each sub-string generated by String.translate, and then it uses compiler-internal, mutable functions (mkstring_, blit_) to copy the translated strings into the final result string.
You can achieve a similar optimization when you know that each character in the input string will result in 0 or 1 characters in the output string. The String.translate function can't, since the result of a translate can be multiple characters. So an alternative implementation uses CharArray. For example:
Find the number of elements in the new string,
fun countP p s =
fold_string (fn (c, total) => if p c
then total + 1
else total) 0 s
Construct a temporary, mutable CharArray, update it and convert it to string:
fun string_filter p s =
let val newSize = countP p s
val charArr = CharArray.array (newSize, #"x")
fun update (c, (newPos, oldPos)) =
if p c
then ( CharArray.update (charArr, newPos, c) ; (newPos+1, oldPos+1) )
else (newPos, oldPos+1)
in fold_string update (0,0) s
; CharArray.vector charArr
end
fun remCharR (c, s) =
string_filter (fn c' => c <> c') s
You'll notice that remCharR is the same, only the implementation of string_filter varied, thanks to some degree of abstraction. This implementation uses recursion via fold_string, but is otherwise comparable to a for loop that updates the index of an array. So while it is recursive, it's also not very abstract.
Considering that you get optimizations comparable to these using String.translate without the low-level complexity of mutable arrays, I don't think this is worthwhile unless you start to experience performance problems.
I'm used to JaneStreet's Core library. Its List module has a neat init function:
List.init;;
- : int -> f:(int -> 'a) -> 'a list = <fun>
It allows you to create a list with using a custom function to initialize elements:
List.init 5 ~f:(Fn.id);;
- : int list = [0; 1; 2; 3; 4]
List.init 5 ~f:(Int.to_string);;
- : string list = ["0"; "1"; "2"; "3"; "4"]
However, this function doesn't seem to exist in Pervasives, which is sad. Am I missing something, or do I have to implement it myself? And if I do need to write it, how do I achieve this?
EDIT:
I have written an imperative version of init, but it doesn't feel right to have to resort to OCaml's imperative features in such a case. :(
let init n ~f =
let i = ref 0 in
let l = ref [] in
while !i < n do
l := (f !i) :: !l;
incr i;
done;
List.rev !l
;;
EDIT 2:
I've opened a pull request on OCaml's GitHub to have this feature included.
EDIT 3:
The feature was released in OCaml 4.06.
A recursive implementation is fairly straightforward. However, it is not tail-recursive, which means that you'll risk a stack overflow for large lists:
let init_list n ~f =
let rec init_list' i n f =
if i >= n then []
else (f i) :: (init_list' (i+1) n f)
in init_list' 0 n f
We can transform it into a tail-recursive version using the usual techniques:
let init_list n ~f =
let rec init_list' acc i n f =
if i >= n then acc
else init_list' ((f i) :: acc) (i+1) n f
in List.rev (init_list' [] 0 n f)
This uses an accumulator and also needs to reverse the intermediate result, as the list is constructed in reverse. Note that we could also use f (n-i-1) instead of f i to avoid reversing the list, but this may lead to unexpected behavior if f has side-effects.
An alternative and shorter solution is to simply use Array.init as a starting point:
let init_list n ~f = Array.(init n f |> to_list)
You can copy the code from JaneStreet and use it.
The code look's like (but not exactly the same) :
let init n ~f =
if n < 0 then raise (Invalid_argument "init");
let rec loop i accum =
if i = 0 then accum
else loop (i-1) (f (i-1) :: accum)
in
loop n []
;;
You can find the original code inside core_list0.ml from the package core_kernel.
OUnit.assert_equal ~pp_diff allows pretty-printing of expected/actual value differences and OUnitDiff seems to provide differs for collections.
Is there a stock pp_diff for string values though? Ideally one that makes a best-effort to expand diffs to the closest UTF-8 sequence boundaries.
Even common prefix/suffix elimination would be better than nothing.
An amusing morning challenge.
type move = Same | Add | Del
let edit_distance_matrix a b =
(* The usual dynamic edit distance algorithm, except we keep
a complete matrix of moves to be able to step back and see which
operations can turn [sa] into [sb].
This is not very efficient: we keep the complete matrices of
distances (costs) and moves. One doesn't need to know the move
for all cases of the matrix, only those that are on the "best"
path from begin to end; it would be better to recompute the moves
along the path after the facts. There probably also exists
a classic clever trick to apply the usual optimization of keeping
only two rows of the matrix at any time, and still compute the
best path along the way.
*)
let la, lb = String.length a, String.length b in
let m = Array.make_matrix (la + 1) (lb + 1) (-1) in
let moves = Array.make_matrix (la + 1) (lb + 1) Same in
m.(0).(0) <- 0;
for i = 1 to la do
m.(i).(0) <- i;
done;
for j = 1 to lb do
m.(0).(j) <- j;
done;
for i = 1 to la do
for j = 1 to lb do
let best, move =
if a.[i-1] = b.[j-1] then m.(i-1).(j-1), Same
else
if m.(i-1).(j) <= m.(i).(j-1)
then m.(i-1).(j) + 1, Del
else m.(i).(j-1) + 1, Add
in
m.(i).(j) <- best;
moves.(i).(j) <- move;
done;
done;
m, moves
let get m (i, j) = m.(i).(j)
let valid m pos =
fst pos >= 0 && snd pos >= 0
let previous (i, j) = function
| Same -> (i - 1, j - 1)
| Add -> (i, j - 1)
| Del -> (i - 1, j)
let cons _pos action = function
| (action', n) :: rest when action = action' ->
(action', n+1) :: rest
| list -> (action, 1) :: list
(** walk back along the "best path", taking notes of changes to make
as we go *)
let chunks moves =
let la = Array.length moves - 1 in
let lb = Array.length moves.(0) - 1 in
let start = (la, lb) in
let rec loop acc pos =
let move = get moves pos in
let next_pos = previous pos move in
(* if the next position is not valid,
the current move is a dummy move,
and it must not be returned as part of [acc] *)
if not (valid moves next_pos) then acc
else loop (cons pos move acc) next_pos
in loop [] start
(** print the list of changes in term of the original string
We skip large parts of the string that are common, keeping only
[context] characters on the sides to provide some context.
*)
let diff context sa sb =
let cost, moves = edit_distance_matrix sa sb in
let chks = chunks moves in
let buf = Buffer.create cost.(String.length sa).(String.length sb) in
let rec loop i j = function
| [] -> ()
| (Same, n) :: rest ->
if n <= 2 * context then
Buffer.add_substring buf sa i n
else begin
Buffer.add_substring buf sa i context;
Buffer.add_string buf "...\n...";
Buffer.add_substring buf sa (i + n - context) context;
end;
loop (i + n) (j + n) rest
| (Add, n) :: rest ->
begin
Buffer.add_string buf "[+";
Buffer.add_substring buf sb j n;
Buffer.add_char buf ']';
end;
loop i (j + n) rest
| (Del, n) :: rest ->
begin
Buffer.add_string buf "[-";
Buffer.add_substring buf sa i n;
Buffer.add_char buf ']';
end;
loop (i + n) j rest
in
begin
try loop 0 0 chks with _ -> ()
end;
Buffer.contents buf
Test:
# print_endline ## diff 4
"le gros chat mange beaucoup de croquettes au saumon"
"le chat maigre mange peu de croquettes au saumon"
;;
le[- gros] chat[+ maigre] mange [+p][-b]e[-auco]u[-p] de ...
...umon
I want to write a function that taking a string and return a list of char. Here is a function, but I think it is not do what I want ( I want to take a string and return a list of characters).
let rec string_to_char_list s =
match s with
| "" -> []
| n -> string_to_char_list n
Aside, but very important:
Your code is obviously wrong because you have a recursive call for which all the parameters are the exact same one you got in. It is going to induce an infinite sequence of calls with the same values in, thus looping forever (a stack overflow won't happen in tail-rec position).
The code that does what you want would be:
let explode s =
let rec exp i l =
if i < 0 then l else exp (i - 1) (s.[i] :: l) in
exp (String.length s - 1) []
Source:
http://caml.inria.fr/pub/old_caml_site/FAQ/FAQ_EXPERT-eng.html#strings
Alternatively, you can choose to use a library: batteries String.to_list or extlib String.explode
Try this:
let explode s = List.init (String.length s) (String.get s)
Nice and simple:
let rec list_car ch =
match ch with
| "" -> []
| ch -> String.get ch 0 :: list_car (String.sub ch 1 (String.length ch - 1));;
How about something like this:
let string_to_list str =
let rec loop i limit =
if i = limit then []
else (String.get str i) :: (loop (i + 1) limit)
in
loop 0 (String.length str);;
let list_to_string s =
let rec loop s n =
match s with
[] -> String.make n '?'
| car :: cdr ->
let result = loop cdr (n + 1) in
String.set result n car;
result
in
loop s 0;;
As of OCaml 4.07 (released 2018), this can be straightforwardly accomplished with sequences.
let string_to_char_list s =
s |> String.to_seq |> List.of_seq
Here is an Iterative version to get a char list from a string:
let string_to_list s =
let l = ref [] in
for i = 0 to String.length s - 1 do
l := (!l) # [s.[i]]
done;
!l;;
My code, suitable for modern OCaml:
let charlist_of_string s =
let rec trav l i =
if i = l then [] else s.[i]::trav l (i+1)
in
trav (String.length s) 0;;
let rec string_of_charlist l =
match l with
[] -> ""
| h::t -> String.make 1 h ^ string_of_charlist t;;