OCaml serializing a (no args) variant as a "string enum" (via Yojson) - ocaml

Say I am building a record type:
type thing {
fruit: string;
}
But I want the possible values of fruit to be constrained to a fixed set of strings.
It seems natural to model this in OCaml as a variant, e.g.:
type fruit = APPLE | BANANA | CHERRY
type thing {
fruit: fruit;
}
Okay so far.
But if I use [##deriving yojson] on these types then the serialized output will be like:
{ "fruit": ["APPLE"] }
By default Yojson wants to serialize a variant as a tuple of [<name>, <args>...] which... I can see the logic of it, but it is not helpful here.
I want it to serialize as:
{ "fruit": "APPLE" }
Making use of a couple of ppx deriving plugins I managed to build this module to de/serialize as I want:
module Fruit = struct
type t = APPLE | BANANA | CHERRY [##deriving enum, variants]
let names =
let pairs i (name, _) = (name, (Option.get (of_enum i))) in
let valist = List.mapi pairs Variants.descriptions in
List.to_seq valist |> Hashtbl.of_seq
let to_yojson v = `String (Variants.to_name v)
let of_yojson = function
| `String s -> Hashtbl.find_opt names s
|> Option.to_result ~none:(Printf.sprintf "Invalid value: %s" s)
| yj -> Error (Printf.sprintf "Invalid value: %s" (Yojson.Safe.to_string yj))
end
Which works fine... but I have some other "string enum" variants I want to treat the same way. I don't want to copy and paste this code every time.
I got as far as this:
module StrEnum (
V : sig
type t
val of_enum : int -> t option
module Variants : sig
val descriptions : (string * int) list
val to_name : t -> string
end
end
) = struct
type t = V.t
let names =
let pairs i (name, _) = (name, (Option.get (V.of_enum i))) in
let valist = List.mapi pairs V.Variants.descriptions in
List.to_seq valist |> Hashtbl.of_seq
let to_yojson v = `String (V.Variants.to_name v)
let of_yojson = function
| `String s -> Hashtbl.find_opt names s
|> Option.to_result ~none:(Printf.sprintf "Invalid StrEnum value: %s" s)
| yj -> Error (Printf.sprintf "Invalid StrEnum value: %s" (Yojson.Safe.to_string yj))
end
module Fruit = struct
type t = APPLE | BANANA | CHERRY [##deriving enum, variants]
end
module FruitEnum = StrEnum (Fruit)
That much seems to type-check, and I can:
utop # Yojson.Safe.to_string (FruitEnum.to_yojson Fruit.APPLE);;
- : string = "\"APPLE\""
utop # FruitEnum.of_yojson (Yojson.Safe.from_string "\"BANANA\"");;
- : (FruitEnum.t, string) result = Ok Fruit.BANANA
...but when I try to:
type thing {
fruit: FruitEnum.t;
}
[##deriving yojson]
I get Error: Unbound value FruitEnum.t
It seems to be because I am re-exporting type t = V.t from the variant's module, I don't really understand though. (Or is it because the yojson ppx can't "see" the result of the functor properly?)
How can I fix this?
I would also like to be able to skip defining the variant module separately and just do:
module Fruit = StrEnum (struct
type t = APPLE | BANANA | CHERRY [##deriving enum, variants]
end)
...but this gives the error:
Error: This functor has type
functor
(V : sig
type t
val of_enum : int -> t option
module Variants :
sig
val descriptions : (string * int) list
val to_name : t -> string
end
end)
->
sig
type t = V.t
val names : (string, t) Hashtbl.t
val to_yojson : t -> [> `String of string ]
val of_yojson : Yojson.Safe.t -> (t, string) result
end
The parameter cannot be eliminated in the result type.
Please bind the argument to a module identifier.
and I don't understand what is wrong.

Regarding the last error, it's because OCaml requires a 'stable path' to types inside modules so it can refer to them. A stable path is a named path to a type, e.g. Fruit.t.
By contrast, StrEnum(struct type t = ... end).t is not a stable path because the type t is referencing a type t in the module literal which does not have a name.
Long story short, you basically can't skip defining the variant module separately. But it's simple to do it in two steps:
module Fruit = struct
type t = ...
end
module Fruit = StrEnum(Fruit)
The second definition refers to the first and shadows it. Shadowing is a well-known and often-used technique in OCaml.
Overall, I'm not sure all this PPX machinery is actually justified. You can pretty easily hand-write converter functions, e.g.
let to_yojson = function
| APPLE -> `String "APPLE"
| BANANA -> `String "BANANA"
| CHERRY -> `String "CHERRY"

Well, I was curious to have a go at writing a PPX deriver to perform this transformation.
Here's what I ended up with:
open Ppxlib
module List = ListLabels
let make_methods ~(loc : location) ~(is_poly : bool) (constructors : constructor_declaration list) =
let (module Ast) = Ast_builder.make loc in
let v_patt = match is_poly with
| true -> fun name -> Ast.ppat_variant name None
| false -> fun name -> Ast.ppat_construct { txt = (Lident name); loc } None
and v_expr = match is_poly with
| true -> fun name -> Ast.pexp_variant name None
| false -> fun name -> Ast.pexp_construct { txt = (Lident name); loc } None
in
let (to_cases, of_cases) =
List.map constructors ~f:(
fun cd ->
let name = cd.pcd_name.txt in
let to_case = {
pc_lhs = v_patt name;
pc_guard = None;
pc_rhs = [%expr `String [%e Ast.estring name] ];
} in
let of_case = {
pc_lhs = Ast.ppat_variant "String" (Some (Ast.pstring name));
pc_guard = None;
pc_rhs = [%expr Ok ([%e v_expr name]) ];
} in
(to_case, of_case)
)
|> List.split
in
let of_default_case = {
pc_lhs = [%pat? yj ];
pc_guard = None;
pc_rhs = [%expr Error (Printf.sprintf "Invalid value: %s" (Yojson.Safe.to_string yj)) ];
} in
let of_cases = of_cases # [of_default_case] in
let to_yojson = [%stri let to_yojson = [%e Ast.pexp_function to_cases]] in
let of_yojson = [%stri let of_yojson = [%e Ast.pexp_function of_cases] ] in
[to_yojson; of_yojson]
let type_impl ~(loc : location) (td : type_declaration) =
match td with
| {ptype_kind = (Ptype_abstract | Ptype_record _ | Ptype_open); _} ->
Location.raise_errorf ~loc "Cannot derive yojson_str_enum for non variant types"
| {ptype_kind = Ptype_variant constructors; _} -> begin
let invalid_constructors =
List.filter_map constructors ~f:(
fun cd -> match cd.pcd_args with
| (Pcstr_tuple [] | Pcstr_record []) -> None
| _ -> Some (cd)
)
in
if (List.length invalid_constructors) > 0 then
Location.raise_errorf ~loc "Cannot derive yojson_str_enum for variant types with constructor args";
match is_polymorphic_variant td ~sig_:false with
| `Definitely | `Maybe -> make_methods ~loc ~is_poly:true constructors
| `Surely_not -> make_methods ~loc ~is_poly:false constructors
end
let generate_impl ~ctxt (_rec_flag, type_declarations) =
(* [loc] is "location", not "lines of code" *)
let loc = Expansion_context.Deriver.derived_item_loc ctxt in
List.map type_declarations ~f:(type_impl ~loc)
|> List.concat
let yojson_str_enum =
Deriving.add
"yojson_str_enum"
~str_type_decl:(Deriving.Generator.V2.make_noarg generate_impl)
to make usable it needs a dune file something like:
(library
(kind ppx_rewriter)
(name <lib name>)
(preprocess (pps ppxlib.metaquot))
(libraries yojson ppxlib))
After adding <lib name> to the pps in your dune file, usage is like:
module Fruit = struct
type t = APPLE | BANANA | CHERRY [##deriving yojson_str_enum]
end
It seems to work fine for my use case. It might be extended per the comment by #Yawar to take args allowing to specify to/from string transform functions for the variant labels. But I was happy just with Fruit.APPLE -> "APPLE" for now. I should also implement the sig_type_decl version.
One part I am a bit uncertain about is this:
match is_polymorphic_variant td ~sig_:false with
| `Definitely | `Maybe -> make_methods ~loc ~is_poly:true constructors
| `Surely_not -> make_methods ~loc ~is_poly:false constructors
I am not very clear when the `Maybe case occurs or how it should most correctly be handled, or if there is a better way of detecting "backtick variants" than using the is_polymorphic_variant method from ppxlib.

Related

Is it possible to have elements of a heterogeneous list depend on the type of preceding elements?

Context
I want to model the routes of a web application such that it satisfies the following requirements:
Can enforce complete definitions
Can create incomplete definitions
Can check if an incomplete definition "matches" (i.e. is contained in) a complete definition.
As an example to work with:
type root =
| Fruit of fruit
| Veggie of veggie
and fruit =
| Apple of apple
| Banana of banana
and veggie =
| Carrot of carrot
and apple = { diameter: int; cultivar: string; }
and banana = { length: int }
and carrot = { length: int; color: [`Orange | `Purple] }
With this, we can easily create and enforce complete definitions:
let complete = Fruit (Apple { diameter = 8; cultivar = "Golden Delicious" })
But cannot create incomplete definitions
let incomplete = Fruit Apple
^^^^^
Error: The constructor Apple expects 1 argument(s),
but is applied here to 0 argument(s)
And therefore also cannot match incomplete and complete definitions, but we can at least verbosely implement a partial equality function that ignores the parameters:
let equal a b = match a, b with
| Fruit (Apple _), Fruit (Apple _ ) -> true
| Fruit (Apple _), _ -> false
| Fruit (Banana _), Fruit (Banana _ ) -> true
| Fruit (Banana _), _ -> false
| Veggie (Carrot _) , Veggie (Carrot _) -> true
| Veggie (Carrot _) , _ -> false
A vague idea towards a possible solution
So I had this idea of using a tree of GADTs and a heterogeneous list to make the definitions more flexible by having the routes defined as lists, e.g.:
let route = [Fruit; Apple; { diameter = 6; cultivar = "Granny Smith" }]
they could then be used with pattern matching and recursion to more easily destructure and compare them.
Unfortunately implementing this isn't quite so easy. This is what I have so far:
type _ root =
| Fruit : _ fruit root
| Veggie : _ veggie root
and _ fruit =
| Apple : apple fruit
| Banana : banana fruit
and _ veggie =
| Carrot : carrot veggie
and apple = { diameter: int; cultivar: string; }
and banana = { length: int }
and carrot = { length: int; color: [`Orange | `Purple] }
type 'a t =
| [] : _ root t
| ( :: ) : 'b * 'a t -> 'b t
Two problems that I see here:
'b isn't constrained by 'a, so anything can be put into the list, as long as it starts with a root t, and there probably isn't a way to recover the type of the elements either. I think this would require Higher-Kinded Types, but maybe there's a way around that?
Even if I was able to solve that, I'm not sure how I'd be able to terminate it. Perhaps the params could be made into GADTs too, and terminate with unit.
It is possible to have heterogeneous lists for which the type of each element depends on the previous element and impose the constraint on the following type. The core idea is to realize that each element need to define in which context it is allowed and which context, and it is then a matter of chaining matching context:
type ('a,'b) t =
| [] : ('a,'a) t
| ( :: ) : ('a, 'b) element * ('b,'c) t -> ('a,'c) t
Here the type ('a,'b) t describes a heterogeneous list which start at the context type 'a and stop at the context type 'b. And it the type definition of ('a,'b) element which determine which transitions is allowed.
In your case, the element type could be defined as something like
module Tag = struct
type final = Done
type root = Root
type fruit = Fruit
type veggie = Veggie
end
type (_,_) element=
| Fruit : (Tag.root, Tag.fruit) element
| Veggie : (Tag.root, Tag.veggie) element
| Apple : (Tag.fruit, apple) element
| Banana : (Tag.fruit, banana) element
| Carrot: (Tag.veggie, carrot) element
| End: 'a -> ('a, Tag.final) element
It is important to notice that the module Tag only provides type level tags(indices) that are not associated to any value.
With this definition:
let fruit = [Fruit]
is a (Tag.root,Tag.fruit) element: the element is only allowed to the top and requires that the following element is allowed in the Tag.fruit context. A valid next element would then be
let apple = [Fruit;Apple]
which is a (Tag.root,Tag.apple) t path.
Finally, it is possible to close a path with the End constructor once we are in a context that maps to concrete type:
type complete = (Tag.root,Tag.final) t
let full_apple : complete =
[Fruit; Apple; End { diameter=0; cultivar="apple"}]
And this construction is still static enough that it is generally possible to recover enough type information for handling partial paths at the price of some redundancy:
let rec prefix: type a b c d. (a,b) t -> (c,d) t -> bool = fun pre x ->
match pre, x with
| [], _ -> true
| Fruit :: q, Fruit :: r -> prefix q r
| Veggie :: q, Veggie :: r -> prefix q r
| [Apple], Apple :: r -> true
| [Banana], Banana :: r -> true
| [Carrot], Carrot :: r -> true
| [Apple; End x], [Apple; End y] -> x = y
| [Banana; End x], [Banana; End y] -> x = y
| [Carrot; End x], [Carrot; End y] -> x = y
| _ -> false

OCaml's GADT as parameter for level of execution

I am trying to write a function run taking a parameter to parametrize its level of execution. I want this function to return its output after a given level. I used GADTs to have the output of run depends on its input.
Here is the code:
type _ level_output =
| FirstO : int -> int level_output
| SecondO : float -> float level_output
| ThirdO : string -> string level_output
type _ run_level_g =
| First : int run_level_g
| Second : float run_level_g
| Third : string run_level_g
type run_level = Any : 'a run_level_g -> run_level
let first _ =
(*do stuff*)
1
let second _ =
(*do stuff*)
2.5
let third _ =
(*do stuff*)
"third"
let run1 (type a) (level:a run_level_g) data : a level_output =
let out = first data in
match level with
| First -> FirstO out
| Second ->
let out = second out in
SecondO out
| Third ->
let out = second out in
let out = third out in
ThirdO out
let run2 (type a) (level:a run_level_g) data : a level_output =
let out = first data in
if Any level = Any First
then FirstO out
else
let out = second out in
if Any level = Any Second
then SecondO out
else
let out = third out in
ThirdO out
type (_,_) eq = Eq : ('a,'a) eq
let eq_level (type a b) (x:a run_level_g) (y:b run_level_g) : (a, b) eq option =
match x, y with
| First, First -> Some Eq
| Second, Second -> Some Eq
| Third, Third -> Some Eq
| _ -> None
let cast_output (type a b) (Eq:(a, b) eq) (v:a level_output) : b level_output = v
let run3 (type a) (level:a run_level_g) data : a level_output =
let out = first data in
let eq = eq_level First level in
match eq with
| Some eq -> cast_output eq (FirstO out)
| None ->
let out = second out in
let eq = eq_level Second level in
match eq with
| Some eq -> cast_output eq (SecondO out)
| None ->
let out = third out in
let eq = eq_level Third level in
match eq with
| Some eq -> cast_output eq (ThirdO out)
| None -> failwith "this can't happen"
There are three versions of run. The first one works well but there is code duplication, which I would like to remove. I would like my function to look more like run2 but this one does not compile because the type checker can't infer the type from the if-condition. An answer to that problem is run3 but now I have this clunky failwith case that obviously can't happen.
I was wondering if there was a way for me to have the best of both worlds, a function with no code duplication and no failwith case?
I find your function run1 the most readable one, by far.
One possibility to remove some code duplication may be to make run1 recursive.
First, one can define a short helper function to extract data from level_output
let proj (type a) (x:a level_output): a =
match x with
| FirstO x -> x
| SecondO x -> x
| ThirdO x -> x;;
then a recursive variant of run may be written as
let rec run: type a. a run_level_g -> 'b -> a level_output =
fun level data -> match level with
| First -> FirstO(first data)
| Second -> SecondO(second ## proj ## run First data)
| Third -> ThirdO(third ## proj ## run Second data);;

Java GuardTypes analogy for OCaml

How do you do, Stackoverflow!
In Java practice there are some issues concerning partially defined functions. Sometimes it's convinient to separate an error handling from the calculation itself. We may utilize an approach called "Guard types" or "Guard decorators".
Consider the simple synthetic example: to guard the null reference. This can be done with the aid of the next class
public class NonNull<T> {
public take() {
return null != this.ref ? this.ref : throw new ExcptionOfMine("message");
}
public NotNull(T ref_) {
this.ref = ref_;
}
private T ref;
}
The question is:
Is there a way to implement the same "Guard type" in OCaml without touching its object model? I believe for the OCaml as the functional programming language to possess enough abstraction methods without objec-oriented technics.
You can use an abstract type to get the same effect. OCaml has no problem with null pointers. So say instead you want to represent a nonempty list in the same way as above. I.e., you want to be able to create values that are empty, but only complain when the person tries to access the value.
module G :
sig type 'a t
val make : 'a list -> 'a t
val take : 'a t -> 'a list
end =
struct
type 'a t = 'a list
let make x = x
let take x = if x = [] then raise (Invalid_argument "take") else x
end
Here's how it looks when you use the module:
$ ocaml
OCaml version 4.02.1
# #use "m.ml";;
module G :
sig type 'a t val make : 'a list -> 'a t val take : 'a t -> 'a list end
# let x = G.make [4];;
val x : int G.t = <abstr>
# G.take x;;
- : int list = [4]
# let y = G.make [];;
val y : '_a G.t = <abstr>
# G.take y;;
Exception: Invalid_argument "take".
There's a concept of Optional types, on which you can effectively pattern match. Example:
let optional = Some 20
let value =
match optional with
| Some v -> v
| None -> 0
You can use simple closures
let guard_list v =
fun () ->
if v = [] then failwith "Empty list"
else v
let () =
let a = guard_list [1;2;3] in
let b = guard_list [] in
print_int (List.length (a ())); (* prints 3 *)
print_int (List.length (b ())) (* throws Failure "Empty list" *)
or lazy values
let guard_string v = lazy begin
if v = "" then failwith "Empty string"
else v
end
let () =
let a = guard_string "Foo" in
let b = guard_string "" in
print_endline (Lazy.force a); (* prints "Foo" *)
print_endline (Lazy.force b) (* throws Failure "Empty string" *)

How to pattern match on a GADT in Bigarray?

How do we pattern match on a GADT? In this case, I'm having trouble with a GADT of Bigarray. More specifically, the code
let print_layout v = match Bigarray.Genarray.layout v with
| Bigarray.C_layout -> Printf.printf "C layout\n"
| Bigarray.Fortran_layout -> Printf.printf "Fortran layout\n"
fails to compile with the error message
Error: This pattern matches values of type
Bigarray.fortran_layout Bigarray.layout
but a pattern was expected which matches values of type
Bigarray.c_layout Bigarray.layout
Type Bigarray.fortran_layout is not compatible with type
Bigarray.c_layout
It's complaining about the Bigarray.Fortran_layout case. If we look at Bigarray we see that
type c_layout = C_layout_typ
type fortran_layout = Fortran_layout_typ
type 'a layout =
C_layout : c_layout layout
| Fortran_layout : fortran_layout layout
So, it's a GADT and I'm doing something wrong with the pattern matching. What's a working version of print_layout?
With gadts, you often need to add annotations when doing a generic pattern matching (one that should work on all the constructors of a type).
Here is the correct way to do what you want:
let print_layout (type t) (v: (_,_,t) Bigarray.Genarray.t) =
match Bigarray.Genarray.layout v with
| Bigarray.C_layout -> Printf.printf "C layout\n"
| Bigarray.Fortran_layout -> Printf.printf "Fortran layout\n"
The annotation introduces an abstract type t that will be the layout type. By pattern matching on the layout, you discover which layout type it is actually equal to.
Here is a function that returns a string representing a layout:
let layout_str : type l. l Bigarray.layout -> string = function
| Bigarray.C_layout -> "C layout"
| Bigarray.Fortran_layout -> "Fortran layout"
You can use this to define your desired function
let print_layout v =
Printf.printf "%s\n" (layout_str (Bigarray.Genarray.layout v))
It works for me:
$ ocaml
OCaml version 4.02.1
# #load "bigarray.cma";;
# open Bigarray;;
# let fv = Genarray.create int32 Fortran_layout [| 0; 1; 2 |];;
val fv :
(int32, Bigarray.int32_elt, Bigarray.fortran_layout) Bigarray.Genarray.t =
<abstr>
# let cv = Genarray.create int32 C_layout [| 0; 1; 2 |];;
val cv : (int32, Bigarray.int32_elt, Bigarray.c_layout) Bigarray.Genarray.t =
<abstr>
# let layout_str : type l. l Bigarray.layout -> string = function
| Bigarray.C_layout -> "C layout"
| Bigarray.Fortran_layout -> "Fortran layout";;
val layout_str : 'l Bigarray.layout -> string = <fun>
# let print_layout v =
Printf.printf "%s\n" (layout_str (Bigarray.Genarray.layout v));;
val print_layout : ('a, 'b, 'c) Bigarray.Genarray.t -> unit = <fun>
# print_layout fv;;
Fortran layout
- : unit = ()
# print_layout cv;;
C layout
- : unit = ()
#

How to check if head is list in F#

Is there a function to check if an object is a list?
I did it like this:
try
let x = unbox<list<obj>>(l)
....
with
| _ -> ...
But i would like to check it with an if or match instead if it is possible.
I'd use:
let isList o =
let ty = o.GetType()
if (ty = typeof<obj>) then false
else
let baseT = ty.BaseType
baseT.IsGenericType && baseT.GetGenericTypeDefinition() = typedefof<_ list>
if (isList o) then
...
This will identify lists containing any type of item (int lists, string lists, etc.).
In case you know type of list elements you can use such code:
let is_list (x: obj) =
match x with
| :? list<int> -> printfn "int list"
| :? list<string> -> printfn "string list"
| _ -> printfn "unknown object"