string difference pretty-printer for OUnit.assert_equal - ocaml

OUnit.assert_equal ~pp_diff allows pretty-printing of expected/actual value differences and OUnitDiff seems to provide differs for collections.
Is there a stock pp_diff for string values though? Ideally one that makes a best-effort to expand diffs to the closest UTF-8 sequence boundaries.
Even common prefix/suffix elimination would be better than nothing.

An amusing morning challenge.
type move = Same | Add | Del
let edit_distance_matrix a b =
(* The usual dynamic edit distance algorithm, except we keep
a complete matrix of moves to be able to step back and see which
operations can turn [sa] into [sb].
This is not very efficient: we keep the complete matrices of
distances (costs) and moves. One doesn't need to know the move
for all cases of the matrix, only those that are on the "best"
path from begin to end; it would be better to recompute the moves
along the path after the facts. There probably also exists
a classic clever trick to apply the usual optimization of keeping
only two rows of the matrix at any time, and still compute the
best path along the way.
*)
let la, lb = String.length a, String.length b in
let m = Array.make_matrix (la + 1) (lb + 1) (-1) in
let moves = Array.make_matrix (la + 1) (lb + 1) Same in
m.(0).(0) <- 0;
for i = 1 to la do
m.(i).(0) <- i;
done;
for j = 1 to lb do
m.(0).(j) <- j;
done;
for i = 1 to la do
for j = 1 to lb do
let best, move =
if a.[i-1] = b.[j-1] then m.(i-1).(j-1), Same
else
if m.(i-1).(j) <= m.(i).(j-1)
then m.(i-1).(j) + 1, Del
else m.(i).(j-1) + 1, Add
in
m.(i).(j) <- best;
moves.(i).(j) <- move;
done;
done;
m, moves
let get m (i, j) = m.(i).(j)
let valid m pos =
fst pos >= 0 && snd pos >= 0
let previous (i, j) = function
| Same -> (i - 1, j - 1)
| Add -> (i, j - 1)
| Del -> (i - 1, j)
let cons _pos action = function
| (action', n) :: rest when action = action' ->
(action', n+1) :: rest
| list -> (action, 1) :: list
(** walk back along the "best path", taking notes of changes to make
as we go *)
let chunks moves =
let la = Array.length moves - 1 in
let lb = Array.length moves.(0) - 1 in
let start = (la, lb) in
let rec loop acc pos =
let move = get moves pos in
let next_pos = previous pos move in
(* if the next position is not valid,
the current move is a dummy move,
and it must not be returned as part of [acc] *)
if not (valid moves next_pos) then acc
else loop (cons pos move acc) next_pos
in loop [] start
(** print the list of changes in term of the original string
We skip large parts of the string that are common, keeping only
[context] characters on the sides to provide some context.
*)
let diff context sa sb =
let cost, moves = edit_distance_matrix sa sb in
let chks = chunks moves in
let buf = Buffer.create cost.(String.length sa).(String.length sb) in
let rec loop i j = function
| [] -> ()
| (Same, n) :: rest ->
if n <= 2 * context then
Buffer.add_substring buf sa i n
else begin
Buffer.add_substring buf sa i context;
Buffer.add_string buf "...\n...";
Buffer.add_substring buf sa (i + n - context) context;
end;
loop (i + n) (j + n) rest
| (Add, n) :: rest ->
begin
Buffer.add_string buf "[+";
Buffer.add_substring buf sb j n;
Buffer.add_char buf ']';
end;
loop i (j + n) rest
| (Del, n) :: rest ->
begin
Buffer.add_string buf "[-";
Buffer.add_substring buf sa i n;
Buffer.add_char buf ']';
end;
loop (i + n) j rest
in
begin
try loop 0 0 chks with _ -> ()
end;
Buffer.contents buf
Test:
# print_endline ## diff 4
"le gros chat mange beaucoup de croquettes au saumon"
"le chat maigre mange peu de croquettes au saumon"
;;
le[- gros] chat[+ maigre] mange [+p][-b]e[-auco]u[-p] de ...
...umon

Related

How to count the number of possible solutions for a sudoku board using backtracking

This is a followup of:
Need some feedback on my fold_neighbours attempt on this problem in Ocaml
I got some really good advices and applied them when i implemented this version.
This is what i have right now, the program can actually read a sudoku and solve it. Here is a demo:
Input board
530070000
600195000
098000060
800060003
400803001
700020006
060000280
000419005
000080079
Solved board
534678912
672195348
198342567
859761423
426853791
713924856
961537284
287419635
345286179
Input board
480006902
002008001
900370060
840010200
003704100
001060049
020085007
700900600
609200018
Solved board
487156932
362498751
915372864
846519273
593724186
271863549
124685397
738941625
659237418
This is the code at the moment
type vertex = int * int
type gamma = int
module V = Map.Make(struct
type t = vertex
let compare = Stdlib.compare
end)
module G = Set.Make(struct
type t = gamma
let compare = compare end)
type pc = gamma V.t
(* Help methods for coloring *)
let color v pc = V.find v pc
let color_vertex v c pc = V.update v (fun _ -> Some c) pc
let is_uncolored v pc = color v pc = 0
let allcolors = (let acc = ref G.empty
in
for gamma = 1 to 9 do
acc := G.add gamma (!acc)
done;
!acc
)
(* Reads a textfile representing sudoku *)
let ascii_digit c = Char.code c - Char.code '0'
let read_matrix_from_channel chan =
let rec loop i j grid =
match input_char chan with
| exception End_of_file -> grid
| '\n' -> loop (i+1) 0 grid
| '0'..'9' as c ->
loop i (j+1) ##
V.add (i,j) (ascii_digit c) grid
| _ -> invalid_arg "invalid input" in
loop 0 0 V.empty
let matrix_from_file file =
let chan = open_in file in
let r = read_matrix_from_channel chan in
close_in chan;
r
(* Prints a pretty sudokuboard (not used in the demo) *)
let print_board vertex =
let print_node (_x,_y) _grid =
if _y = 0
then Printf.printf "\n | ";
print_int _grid;
print_string " | "
in
V.iter print_node vertex
(*executes a function on all vertices *)
let fold_vertices f acc =
let acc' = ref acc in
for i=0 to 8 do
for j=0 to 8 do
acc' := f (i,j) (!acc')
done
done;
!acc'
let find_vertex pred =
fold_vertices
(fun v a -> match a with
| None -> if pred v then Some v else None
| _ -> a) None
(* neighbours *)
let is_neighbour (i,j) (i',j') =
if (i,j) = (i',j') then
false
else if i = i' || j = j' then
true
else
(i/3) = (i'/3) && (j/3) = (j'/3)
(* fold_neighbours *)
let fold_neighbours f v acc =
let ff u acc =
if is_neighbour u v then f u acc else acc
in fold_vertices ff acc
(* Checks if vertex is allowed *)
let allowed v pc =
fold_neighbours (fun u allowed ->
match color u pc with
| 0 -> allowed
| c -> G.remove c allowed)
v allcolors
(* Solve *)
let rec solve pc =
match find_vertex (fun v -> is_uncolored v pc) with
| Some v ->
let all = allowed v pc in
G.fold (fun c -> function
| None -> color_vertex v c pc |> solve
| opc -> opc)
all None
| None -> Some pc
Ive got a problem now, not sure how to this. I want to count all the possible solutions for a sudoku using backtracking. So i want to just return the number of solutions as a integer.
I think i somehow need to recursively call the solve method in a "nsolve" method. Return None if there are no solution, else i check that the solution isnt the same as earlier, if its not i return Some solution and counts up else i return None.
Otherwise,maybe there is some way to get the approximate number of solutions. Because i guess that for an almost empty sudoku board it will take a very long time to recursively get the number of solutions. An empty sudoku board have 10^21 solutions, which is a lot. I don't know how to tackle this issue.
I there another way of doing this?
This is the method im trying to implement([nsolve pc]):
(** [nsolve pc] uses backtracking to count the number of possible solution to the sudoku. *)
let rec nsolve pc = .....
The simplest way to do this is by brute-forcing it. In pseudo code this would be
fun nb_sol cpt i j grid =
if i > 8 /\ j > 8 then cpt + 1
else if is_empty grid(i)(j) then
for k = 1 to 9 do
grid(i)(j) <- k
if is_ok grid then
res += (if j > 8 then nb_sol cpt (i+1) 0 grid
else nb_sol cpt i (j+1) grid)
done
grid(i)(j) <- empty
I didn't check if it was exactly what you wanted but you got the idea :-)

F# Performance: What is making this code so slow?

This F# code is an attempt to solve Project Euler problem #58:
let inc = function
| n -> n + 1
let is_prime = function
| 2 -> true
| n when n < 2 || n%2=0-> false
| n ->
[3..2..(int (sqrt (float n)))]
|> List.tryFind (fun i -> n%i=0)
|> Option.isNone
let spir = Seq.initInfinite (fun i ->
let n = i%4
let a = 2 * (i/4 + 1)
(a*n) + a + (a-1)*(a-1))
let rec accum se p n =
match se with
| x when p*10 < n && p <> 0 -> 2*(n/4) + 1
| x when is_prime (Seq.head x) -> accum (Seq.tail x) (inc p) (inc n)
| x -> accum (Seq.tail x) p (inc n)
| _ -> 0
printfn "%d" (accum spir 0 1)
I do not know the running time of this program because I refused to wait for it to finish. Instead, I wrote this code imperatively in C++:
#include "stdafx.h"
#include "math.h"
#include <iostream>
using namespace std;
int is_prime(int n)
{
if (n % 2 == 0) return 0;
for (int i = 3; i <= sqrt(n); i+=2)
{
if (n%i == 0)
{
return 0;
}
}
return 1;
}
int spir(int i)
{
int n = i % 4;
int a = 2 * (i / 4 + 1);
return (a*n) + a + ((a - 1)*(a - 1));
}
int main()
{
int n = 1, p = 0, i = 0;
cout << "start" << endl;
while (p*10 >= n || p == 0)
{
p += is_prime(spir(i));
n++; i++;
}
cout << 2*(i/4) + 1;
return 0;
}
The above code runs in less than 2 seconds and gets the correct answer.
What is making the F# code run so slowly? Even after using some of the profiling tools mentioned in an old Stackoverflow post, I still cannot figure out what expensive operations are happening.
Edit #1
With rmunn's post, I was able to come up with a different implementation that gets the answer in a little under 30 seconds:
let inc = function
| n -> n + 1
let is_prime = function
| 2 -> true
| n when n < 2 || n%2=0-> false
| n ->
[3..2..(int (sqrt (float n)))]
|> List.tryFind (fun i -> n%i=0)
|> Option.isNone
let spir2 =
List.unfold (fun state ->
let p = fst state
let i = snd state
let n = i%4
let a = 2 * (i/4 + 1)
let diag = (a*n) + a + (a-1)*(a-1)
if p*10 < (i+1) && p <> 0 then
printfn "%d" (2*((i+1)/4) + 1)
None
elif is_prime diag then
Some(diag, (inc p, inc i))
else Some(diag, (p, inc i))) (0, 0)
Edit #2
With FuleSnabel's informative post, his is_prime function makes the above code run in under a tenth of a second, making it faster than the C++ code:
let inc = function
| n -> n + 1
let is_prime = function
| 1 -> false
| 2 -> true
| v when v % 2 = 0 -> false
| v ->
let stop = v |> float |> sqrt |> int
let rec loop vv =
if vv <= stop then
if (v % vv) <> 0 then
loop (vv + 2)
else
false
else
true
loop 3
let spir2 =
List.unfold (fun state ->
let p = fst state
let i = snd state
let n = i%4
let a = 2 * (i/4 + 1)
let diag = (a*n) + a + (a-1)*(a-1)
if p*10 < (i+1) && p <> 0 then
printfn "%d" (2*((i+1)/4) + 1)
None
elif i <> 3 && is_prime diag then
Some(diag, (inc p, inc i))
else Some(diag, (p, inc i))) (0, 0)
There is no Seq.tail function in the core F# library (UPDATE: Yes there is, see comments), so I assume you're using the Seq.tail function from FSharpx.Collections. If you're using a different implementation of Seq.tail, it's probably similar -- and it's almost certainly the cause of your problems, because it's not O(1) like you think it is. Getting the tail of a List is O(1) because of how List is implemented (as a series of cons cells). But getting the tail of a Seq ends up creating a brand new Seq from the original enumerable, discarding one item from it, and returning the rest of its items. When you go through your accum loop a second time, you call Seq.tail on that "skip 1 then return" seq. So now you have a Seq which I'll call S2, which asks S1 for an IEnumerable, skips the first item of S1, and returns the rest of it. S1, when asked for its first item, asks S0 (the original Seq) for an enumerable, skips its first item, then returns the rest of it. So for S2 to skip two items, it had to create two seqs. Now on your next run through when you ask for the Seq.tail of S2, you create S3 that asks S2 for an IEnumerable, which asks S1 for an IEnumerable, which asks S0 for an IEnumerable... and so on. This is effectively O(N^2), when you thought you were writing an O(N) operation.
I'm afraid I don't have time right now to figure out a solution for you; using List.tail won't help since you need an infinite sequence. But perhaps just knowing about the Seq.tail gotcha is enough to get you started, so I'll post this answer now even though it's not complete.
If you need more help, comment on this answer and I'll come back to it when I have time -- but that might not be for several days, so hopefully others will also answer your question.
Writing performant F# is very possible but requires some knowledge of patterns that have high relative CPU cost in a tight loop. I recommend using tools like ILSpy to find hidden overhead.
For instance one could imagine F# exands this expression into an effective for loop:
[3..2..(int (sqrt (float n)))]
|> List.tryFind (fun i -> n%i=0)
|> Option.isNone
However it currently doesn't. Instead it creates a List that spans the range using intrinsic operators and passes that to List.tryFind. This is expensive when compared to the actual work we like to do (the modulus operation). ILSpy decompiles the code above into something like this:
public static bool is_prime(int _arg1)
{
switch (_arg1)
{
case 2:
return true;
default:
return _arg1 >= 2 && _arg1 % 2 != 0 && ListModule.TryFind<int>(new Program.Original.is_prime#10(_arg1), SeqModule.ToList<int>(Operators.CreateSequence<int>(Operators.OperatorIntrinsics.RangeInt32(3, 2, (int)Math.Sqrt((double)_arg1))))) == null;
}
}
These operators aren't as performant as they could be (AFAIK this is currently being improved) but no matter how effecient allocating a List and then search it won't beat a for loop.
This means the is_prime is not as effective as it could be. Instead one could do something like this:
let is_prime = function
| 1 -> false
| 2 -> true
| v when v % 2 = 0 -> false
| v ->
let stop = v |> float |> sqrt |> int
let rec loop vv =
if vv <= stop then
(v % vv) <> 0 && loop (vv + 2)
else
true
loop 3
This version of is_prime relies on tail call optimization in F# to expand the loop into an efficient for loop (you can see this using ILSpy). ILSpy decompile the loop into something like this:
while (vv <= stop)
{
if (_arg1 % vv == 0)
{
return false;
}
int arg_13_0 = _arg1;
int arg_11_0 = stop;
vv += 2;
stop = arg_11_0;
_arg1 = arg_13_0;
}
This loop doesn't allocate memory and is just a rather efficient loop. One see some non-sensical assignments but hopefully the JIT:er eliminate those. I am sure is_prime can be improved even further.
When using Seq in performant code one have to keep in mind it's lazy and it doesn't use memoization by default (see Seq.cache). Therefore one might easily end up doing the same work over and over again (see #rmunn answer).
In addition Seq isn't especially effective because of how IEnumerable/IEnumerator are designed. Better options are for instance Nessos Streams (available on nuget).
In case you are interested I did a quick implementation that relies on a simple Push Stream which seems decently performant:
// Receiver<'T> is a callback that receives a value.
// Returns true if it wants more values, false otherwise.
type Receiver<'T> = 'T -> bool
// Stream<'T> is function that accepts a Receiver<'T>
// This means Stream<'T> is a push stream (as opposed to Seq that uses pull)
type Stream<'T> = Receiver<'T> -> unit
// is_prime returns true if the input is prime, false otherwise
let is_prime = function
| 1 -> false
| 2 -> true
| v when v % 2 = 0 -> false
| v ->
let stop = v |> float |> sqrt |> int
let rec loop vv =
if vv <= stop then
(v % vv) <> 0 && loop (vv + 2)
else
true
loop 3
// tryFind looks for the first value in the input stream for f v = true.
// If found tryFind returns Some v, None otherwise
let tryFind f (s : Stream<'T>) : 'T option =
let res = ref None
s (fun v -> if f v then res := Some v; false else true)
!res
// diagonals generates a tuple stream of all diagonal values
// The first value is the side length, the second value is the diagonal value
let diagonals : Stream<int*int> =
fun r ->
let rec loop side v =
let step = side - 1
if r (side, v + 1*step) && r (side, v + 2*step) && r (side, v + 3*step) && r (side, v + 4*step) then
loop (side + 2) (v + 4*step)
if r (1, 1) then loop 3 1
// ratio computes the streaming ratio for f v = true
let ratio f (s : Stream<'T>) : Stream<float*'T> =
fun r ->
let inc r = r := !r + 1.
let acc = ref 0.
let count = ref 0.
s (fun v -> (inc count; if f v then inc acc); r (!acc/(!count), v))
let result =
diagonals
|> ratio (snd >> is_prime)
|> tryFind (fun (r, (_, v)) -> v > 1 && r < 0.1)

Use mutual recursion to solve a combinatorial game

It is a typical impartial game. Two players take turn to pick sticks which are labeled from 1 to n, and whenever No.1 stick gets picked the game is over. Rules are simple: PlayerA counts down to 5 and pick the correspondent stick; PlayerA counts up to 2 and pick the correspondent stick. The program is trying to find an initial point such that No.1 stick will be last picked.
I try to loop over the list and find which initial position will give the satisfying result, but it seems to returning value is not right. What's wrong with the codes??
fun play(stick) =
let
val stick_list = n_list(stick)
(*n_list(8) will generate an int list [1,2,3,4,5,6,7,8]*)
fun playerA(x::nil, n) = x
| playerA(stick_list, n) =
let
val pos = (n + (5 mod size(stick_list))) mod size(stick_list)
in
playerB(delete(stick_list, pos), pos)
end
and playerB(x::nil, n) = x
| playerB(stick_list, n) =
let
val pos = (n + (~2 mod size(stick_list))) mod size(stick_list)
in
playerA(delete(stick_list, pos), pos)
end
fun search(n) = if playerA(stick_list, n - 1) = 1 then n + 1 else search(n - 1)
in
search(stick - 1)
end;
Here are some suggestions:
n_list is easily implemented as List.tabulate (stick, fn i => i + 1).
Use length rather than size when working on lists.
Consider remembering the number of sticks left, so the length does not need to be recalculated. E.g.
fun play nSticks =
let val allSticks = List.tabulate (nSticks, fn i => i + 1)
fun playerA ([stick], _, n) = stick
| playerA (sticks, sticksLeft, n) =
let val pos = (n + (5 mod sticksLeft)) mod sticksLeft
in
playerB (delete (sticks, pos), sticksLeft - 1, pos)
end
...
Unless this is an exercise in mutual recursion, it seems that you can avoid repeating yourself a lot if you combine the two player functions into one, e.g. by
datatype Player = PlayerA | PlayerB
fun player (_, stick::_, 0) = stick
| player (player, sticks, sticksLeft, pos) = ...
let val (newPos, otherPlayer) =
case player of
PlayerA => ((n + 5) mod sticksLeft, PlayerB)
| PlayerB => ((sticksLeft + n - 2) mod sticksLeft, PlayerA)
val newSticks = delete (sticks, pos)
in player (otherPlayer, newSticks, sticksLeft - 1, newPos) end

Check list for consecutive values within range (F#)

In a sorted list of 10 numbers, I want to find out whether any 5 consecutive numbers are within a certain range. For reference: This is called finding a "stellium" (astronomical term, regarding positions of planets).
If the list is:
let iList = [15; 70; 72; 75; 80; 81; 120; 225; 250; 260]
I want a function
let hasStellium iStellSize iStellRange iList
that will return
hasStellium 5 20 iList = true
The list is already sorted, so I could just proceed with clunky if-then statements (like "Check whether element 1 and 5 are less than 20 units apart, check whether element 2 and 6 satisfy the condition" etc.
let hasStellium iStellSize iStellRange iList=
if
iList.[iStellSize-1] - iList.[0] < iStellRange ||
iList.[iStellSize] - iList.[1] < iStellRange
then true
else false
But there must be a more elegant way, that also allows for other stellium sizes without having to manually add if-then lines.
Thank you very much for your help!
(If the function could return the index number where the stellium starts, even better)
Just combining two standard library functions:
let hasStellium iStellSize iStellRange iList =
iList |> Seq.windowed iStellSize
|> Seq.tryFindIndex (fun s -> (s.[iStellSize - 1] - s.[0] < iStellRange))
returns either None if no such range can be found, otherwise Some x where x - range beginning index.
Here you go. It returns an int option which is the start index of the range, or None if not found.
let tryFindStelliumIndex iStellSize iStellRange iList =
let rec findRange i n = function
| _ when (n + 1) = iStellSize -> Some (i - n)
| prev::cur::tail when (cur - prev) < iStellRange -> findRange (i + 1) (n + 1) (cur::tail)
| _::tail -> findRange (i + 1) 0 tail
| _ -> None
findRange 0 0 iList
Another variant using Seq functions:
let hasStellium size range l =
Seq.zip l (l |> Seq.skip (size - 1))
|> Seq.tryFindIndex (fun p -> snd p - fst p < range)
Had to hack in an "early return" with a mutable variable, but here is a rough version
let mutable found = false
for i = 0 to (iList |> List.length - iStellSize) do
if iList.[i + iStellSize] - iList.[i] <= iStellRange then //Did you mean < or <=?
found <- true
found

Bell numbers in OCaml

I'm writing a program in OCaml which should calculate the first 100 Bell numbers.
Here is my code (I'm using the Num module):
open Num
let one = num_of_int 1;;
let zero = num_of_int 0;;
calculate factorial:
let rec factorial n =
if n < 2
then one
else (num_of_int n) */ factorial(n-1)
calculate Newton:
let rec newton n k =
factorial n // (factorial k */ factorial (n-k))
let bell = Array.make 101 zero;;
bell.(0) <- one;;
bell.(1) <- one;;
let i = ref 2
let k = ref 0
let x = ref zero
let suma = ref zero
let n = ref 100
if !n != 0 || !n != 1 then
while !i <= !n do
while !k <= (!i-1) do
x := newton (!i-1) !k;
suma := !suma +/ (!x */ bell.(!k));
k := !k + 1
done;
bell.(int_of_num !k) <- (!suma);
suma:= zero;
k:=0;
i:= !i + 1;
done;;
bell.(int_of_num 20)
This is my first program in this language. I have some problems with compiling it.
You miss a ;; at the end of the last let n = ref 100. But it is considered bad style anyway to use global variables as helper variables of an algorithm. The code below is a minimal fix of the code from the question and not an endorsement of other aspects of bad style, like not using for loops mentioned in comments.
(* #load "nums.cma";; if in toplevel *)
open Num
let one = num_of_int 1;;
let zero = num_of_int 0;;
let rec factorial n =
if n < 2
then one
else (num_of_int n) */ factorial(n-1)
let rec newton n k =
factorial n // (factorial k */ factorial (n-k))
let bell input =
let bell = Array.make (input+1) zero in
bell.(0) <- one;
bell.(1) <- one;
let i = ref 2 in
let k = ref 0 in
let x = ref zero in
let suma = ref zero in
let n = ref input in
if !n <> 0 || !n <> 1 then
while !i <= !n do
while !k <= (!i-1) do
x := newton (!i-1) !k;
suma := !suma +/ (!x */ bell.(!k));
k := !k + 1
done;
bell.(!k) <- (!suma);
suma:= zero;
k:=0;
i:= !i + 1;
done;
bell.(input)