ocaml stringOfList using concat - ocaml

The code I currently have is this where I'm testing stringOfList function
let rec sepConcat sep sl = match sl with
| [] -> ""
| h :: t ->
let f a x = a^sep^x in
let base = h in
let l = t in
List.fold_left f base l
let stringOfList f l = sepConcat "; " (List.map f l)
The output I'm supposed to get is
# stringOfList string_of_int [1;2;3;4;5;6];;
- : string = "[1; 2; 3; 4; 5; 6]"
but I'm getting
# stringOfList string_of_int [1;2;3;4;5;6];;
- : string = "1; 2; 3; 4; 5; 6"
What am I doing wrong? How can i add the extra [] inside the parentheses. Sepconcat is supposed to do this
# sepConcat ", " ["foo";"bar";"baz"];;
- : string = "foo, bar, baz"
# sepConcat "---" [];;
- : string = ""
# sepConcat "" ["a";"b";"c";"d";"e"];;
- : string = "abcde"
# sepConcat "X" ["hello"];;
- : string = "hello"

You're not doing anything wrong, there's just no place in the code that adds the square bracket characters.
I'd say that the square brackets are another punctuation thing like your sep parameter. You might want to pass them in as additional parameters at some level. Seems like it would be OK to add them as parameters to sepConcat--then it would handle all the punctuation.
Edit: it's not going to help to pass different values as sep. The separator goes between the strings. From your description, you want the extra brackets to go around the outside of the result. Not a separator.
Edit: The " characters are not part of the string. They are OCaml's way of showing you that the value is a string.
# let x = "a";;
val x : string = "a"
# x;;
- : string = "a"
# String.length x;;
- : int = 1
#
Any string value will have "" around it when printed by the interpreter. So there's no way the square brackets can be outside them!

Change your stringOfList to
let stringOfList f l =
let str = sepConcat "; " (List.map f l) in
"["^str^"]"
# stringOfList string_of_int [1;2;3;4;5];;
- : string = "[1; 2; 3; 4; 5]"

Related

how to extract words from a string with special characters

I am currently trying to do a spellcheck, but am having some trouble dealing with certain cases.
For example, given the string: { else"--but, }, my spellcheck automatically reads this as an invalid word. However, since else and but are both correctly spelled, I don't want to mark this as incorrect.
Is there any way I can do this with regular expressions?
A more common case I am having trouble with is things like "waistcoat-pocket".
Rather than a regular expression, you should use unicode word segmentation. With the uuseg and uucp library, you can extract words and filter word boundaries with
let is_alphaword =
let alphachar = function
| `Malformed _ -> false
| `Uchar x ->
match Uucp.Break.word x with
| `LE | `Extend -> true
| _ -> false
in
Uutf.String.fold_utf_8 (fun acc _ x -> acc && alphachar x) true
(* Note that we are supposing strings to be utf-8 encoded *)
let words s =
let cons l x = if is_alphaword x then x :: l else l in
List.rev (Uuseg_string.fold_utf_8 `Word cons [] s)
This function splits the string words-by-words:
words "else\"--but";;
- : string list = ["else"; "but"]
words "waistcoat-pocket";;
- : string list = ["waistcoat"; "pocket"]
and works correctly in more general context
words "आ तवेता नि षीदतेन्द्रमभि पर गायत";;
- : string list =
["आ"; "तवेता"; "नि"; "षीदतेन्द्रमभि";
"पर"; "गायत"]
or
words "Étoile(de Barnard)";;
- : string list = ["Étoile"; "de"; "Barnard"]

How do you split a string into lists unless it is inside quotation marks ("") in Ocaml?

I'm reading an input file of several lines. Each line has the following format:
Greeting "hello"
Greeting " Good morning"
Sit
Smile
Question "How are you?"
My current can read each line into a string list. Then I process it using this function which is supposed to break it into a string list list:
let rec process (l : string list) (acc : string list list) : string list list =
match l with
| [] -> acc
| hd :: tl -> String.split_on_char ' ' hd :: (process tl acc)
Which, unfortunately, does not work, since it also splits spaces inside quotation marks. Anyone think of a the right way to do this, possibly using map or fold_left, etc? This would be my expected output:
[["Greeting"; "/"hello/""];[Greeting; "/" Good morning"];["Sit"]]
and so on. Thank you!
You want a real (but very simple) lexical analysis. IMHO this is beyond what you can do with simple string splitting.
A scanner takes a stream of characters and returns the next token it sees. You can make a string into a stream by having an index that traverses the string.
Here is a scanner that is roughly what you would want:
let rec scan s offset =
let slen = String.length s in
if offset >= slen then
None
else if s.[offset] = ' ' then
scan s (offset + 1)
else if s.[offset] = '"' then
let rec qlook loff =
if loff >= slen then
(* Unterminated quotation *)
let tok = String.sub s offset (slen - offset) in
Some (tok, slen)
else if s.[loff] = '"' then
let tok = String.sub s offset (loff - offset + 1) in
Some (tok, loff + 1)
else qlook (loff + 1)
in
qlook (offset + 1)
else
let rec wlook loff =
if loff >= slen then
let tok = String.sub s offset (slen - offset) in
Some (tok, slen)
else if s.[loff] = ' ' || s.[loff] = '"' then
let tok = String.sub s offset (loff - offset) in
Some (tok, loff)
else
wlook (loff + 1)
in
wlook (offset + 1)
It handles a few cases that you didn't specify: what to do if there is an unclosed quotation. What to do with something like abc"def ghi".
The scanner returns None at the end of the string, or Some (token, offset), i.e., the next token and the offset to continue scanning.
A recursive function to break up a string would look something like this:
let split s =
let rec isplit accum offset =
match scan s offset with
| None -> List.rev accum
| Some (tok, offset') -> isplit (tok :: accum) offset'
in
isplit [] 0
This can be visualized with a state machine. You have 2 main states: looking for ' ' and looking for '"'. Processing strings is ugly and you can't pattern match it. So first thing I did is turn the string into a char list. Implementing the two states then becomes simple:
let split s =
let rec split_space acc word = function
| [] -> List.rev (List.rev word::acc)
| ' '::xs -> split_space (List.rev word::acc) [] xs
| '"'::xs -> find_quote acc ('"'::word) xs
| x::xs -> split_space acc (x::word) xs
and find_quote acc word = function
| [] -> List.rev (List.rev word::acc)
| '"'::xs -> split_space acc ('"'::word) xs
| x::xs -> find_quote acc (x::word) xs
in
split_space [] [] s
;;
# split ['a';'b';' ';'"';'c';' ';'d';'"';' ';'e'];;
- : char list list = [['a'; 'b']; ['"'; 'c'; ' '; 'd'; '"']; ['e']]
Now if you want to do it with strings that's left to you. The Idea would be the same. Or you can just turn the char list list into a string list at the end.

Trying to get first word from character list

I have a character list [#"h", #"i", #" ", #"h", #"i"] which I want to get the first word from this (the first character sequence before each space).
I've written a function which gives me this warning:
stdIn:13.1-13.42 Warning: type vars not generalized because of value
restriction are instantiated to dummy types (X1,X2,...)
Here is my code:
fun next [] = ([], [])
| next (hd::tl) = if(not(ord(hd) >= 97 andalso ord(hd) <= 122)) then ([], (hd::tl))
else
let
fun getword [] = [] | getword (hd::tl) = if(ord(hd) >= 97 andalso ord(hd) <= 122) then [hd]#getword tl else [];
in
next (getword (hd::tl))
end;
EDIT:
Expected input and output
next [#"h", #"i", #" ", #"h", #"i"] => ([#"h", #"i"], [#" ", #"h", #"i"])
Can anybody help me with this solution? Thanks!
This functionality already exists within the standard library:
val nexts = String.tokens Char.isSpace
val nexts_test = nexts "hi hi hi" = ["hi", "hi", "hi"]
But if you were to build such a function anyway, it seems that you return ([], []) sometimes and a single list at other times. Normally in a recursive function, you can build the result by doing e.g. c :: recursive_f cs, but this is assuming your function returns a single list. If, instead, it returns a tuple, you suddenly have to unpack this tuple using e.g. pattern matching in a let-expression:
let val (x, y) = recursive_f cs
in (c :: x, y + ...) end
Or you could use an extra argument inside a helper function (since the extra argument would change the type of the function) to store the word you're extracting, instead. A consequence of doing that is that you end up with the word in reverse and have to reverse it back when you're done recursing.
fun isLegal c = ord c >= 97 andalso ord c <= 122 (* Only lowercase ASCII letters *)
(* But why not use one of the following:
fun isLegal c = Char.isAlpha c
fun isLegal c = not (Char.isSpace c) *)
fun next input =
let fun extract (c::cs) word =
if isLegal c
then extract cs (c::word)
else (rev word, c::cs)
| extract [] word = (rev word, [])
in extract input [] end
val next_test_1 =
let val (w, r) = next (explode "hello world")
in (implode w, implode r) = ("hello", " world")
end
val next_test_2 = next [] = ([], [])

Ocaml nested functions

Can someone explain the syntax used for when you have nested functions?
For example I have a outer and an inner recursive function.
let rec func1 list = match list with
[] -> []
|(head::tail) ->
let rec func2 list2 = match list2 with
...
;;
I have spent all day trying to figure this out and I'm getting a ever tiring "Syntax error".
You don't show enough code for the error to be obvious.
Here is a working example:
# let f x =
let g y = y * 5 in
g (x + 1);;
val f : int -> int = <fun>
# f 14;;
- : int = 75
Update
Something that might help until you're used to OCaml syntax is to use lots of extra parentheses:
let rec f y x =
match x with
| h :: t -> (
let incr v = if h = y then 1 + v else v in
incr (f y t)
)
| _ -> (
0
)
It's particularly hard to nest one match inside another without doing this sort of thing. This may be your actual problem rather than nested functions.

string to list of char

I want to write a function that taking a string and return a list of char. Here is a function, but I think it is not do what I want ( I want to take a string and return a list of characters).
let rec string_to_char_list s =
match s with
| "" -> []
| n -> string_to_char_list n
Aside, but very important:
Your code is obviously wrong because you have a recursive call for which all the parameters are the exact same one you got in. It is going to induce an infinite sequence of calls with the same values in, thus looping forever (a stack overflow won't happen in tail-rec position).
The code that does what you want would be:
let explode s =
let rec exp i l =
if i < 0 then l else exp (i - 1) (s.[i] :: l) in
exp (String.length s - 1) []
Source:
http://caml.inria.fr/pub/old_caml_site/FAQ/FAQ_EXPERT-eng.html#strings
Alternatively, you can choose to use a library: batteries String.to_list or extlib String.explode
Try this:
let explode s = List.init (String.length s) (String.get s)
Nice and simple:
let rec list_car ch =
match ch with
| "" -> []
| ch -> String.get ch 0 :: list_car (String.sub ch 1 (String.length ch - 1));;
How about something like this:
let string_to_list str =
let rec loop i limit =
if i = limit then []
else (String.get str i) :: (loop (i + 1) limit)
in
loop 0 (String.length str);;
let list_to_string s =
let rec loop s n =
match s with
[] -> String.make n '?'
| car :: cdr ->
let result = loop cdr (n + 1) in
String.set result n car;
result
in
loop s 0;;
As of OCaml 4.07 (released 2018), this can be straightforwardly accomplished with sequences.
let string_to_char_list s =
s |> String.to_seq |> List.of_seq
Here is an Iterative version to get a char list from a string:
let string_to_list s =
let l = ref [] in
for i = 0 to String.length s - 1 do
l := (!l) # [s.[i]]
done;
!l;;
My code, suitable for modern OCaml:
let charlist_of_string s =
let rec trav l i =
if i = l then [] else s.[i]::trav l (i+1)
in
trav (String.length s) 0;;
let rec string_of_charlist l =
match l with
[] -> ""
| h::t -> String.make 1 h ^ string_of_charlist t;;