(* We can use variant types to better cope with the homogeneity of CaML's polymorphic type system: *) [1;2.0;3.5;4];; (* bad! different types of elements in list *) type number = f of float | i of int;; [i 1;f 2.0;f 3.5;i 4];; let rec sumlist = function [] -> 0.0 | ((f x)::t) -> x +. sumlist(t) | ((i x)::t) -> float_of_int(x) +. sumlist(t);; sumlist [i 1;f 2.0;f 3.5;i 4];; (* User-defined datatypes can be recursive: *) type tree = leaf of int | node of tree * tree;; let rec sumtree = function (leaf n) -> n | (node(t1,t2)) -> sumtree(t1) + sumtree(t2);; let t = node(leaf(3),node(leaf(2),leaf(5)));; sumtree t;; (* User-defined types can also be parametrically polymorphic: *) type 'a tree = leaf of 'a | node of ('a tree) * ('a tree);; leaf(2);; node(leaf("ab"),leaf("cd"));; node(leaf(2),leaf("cd"));; let rec count = function (leaf _) -> 1 | (node (t1,t2)) -> count t1 + count t2;; (* ENUMERATED TYPES: consist of type constructors with no arguments *) type day = mon | tue | wed | thu | fri;; (* AUXILIARY FUNCTIONS: functions used solely to help define another function. They can typically be thought of as generalizations of the functions they're helping, and they often have extra parameters that are used to store intermediate results - almost like a local variable. *) let rec sum_list = function [] -> 0 | x::xs -> x+sum_list(xs);; (* OK, but we can do better. This one is not tail-recursive. Better: *) let sum_list(l) = let rec sum_list_aux accum = function [] -> accum | x::xs -> sum_list_aux (accum+x) xs in sum_list_aux 0 l;; sum_list([1;2;3;4]);; (* The accumulator maintains a partial sum as we traverse the list. Furthermore, the addition is called as an argument to sum_list_aux, so this is tail-recursive (notice that sum_list itself is no longer even recursive). Both are correct - all that is at stake here is efficiency. In the case of reverse, the accumulator actually brings the algorithm down into linear time. The time savings can be quite significant. Another example - adding together the last three elements of a given list: *) let threesum(l) = nth(reverse(l),0) + nth(reverse(l),1) + nth(reverse(l),2);; (* reverse and nth are functions that you must write in your assignment *) threesum([2;3;4;5;6]); OR let threesum(l) = let r = reverse(l) in nth(r,0) + nth(r,1) + nth(r,2);; OR fun threesum(l) = let threesum_aux(r) = nth(r,0) + nth(r,1) + nth(r,2) in threesum_aux(reverse(l));; (* Notice that both of the last two solutions abstract the work of the first one into: (fun r -> nth(r,0) + nth(r,1) + nth(r,2))(reverse(l)) but they differ on which part gets thrown into the let statement. In fact, we could also define the function in exactly this way, since we have a direct way (lambda terms or anonymous functions) of handling functions: *) let threesum(l) = (fun r -> nth(r,0) + nth(r,1) + nth(r,2))(reverse(l));; (* Not all abstractions make you look smart, however. This one is just plain silly: *) let threesum(list) = (* don't do this *) (fun l -> nth(reverse(l),0) + nth(reverse(l),1) + nth(reverse(l),2))(list);; (* This isn't much better: *) let threesum = (fun l -> nth(reverse(l),0) + nth(reverse(l),1) + nth(reverse(l),2));; (* So why bother with auxiliary functions at all? Can't we always use anonymous functions? (Yes / No) *) (* Be careful when you combine recursion with auxiliary functions, however. Bad idea: *) let rec sumcube n = let cube x = x * x * x in if n = 0 then 0 else cube n + sumcube (n-1);; (* cube is redefined at every level of recursion :-( Better idea: *) let sumcube n = let cube x = x * x * x in let rec sumc = function 0 -> 0 | m -> cube m + sumc (m-1) in sumc n;; (* In fact, even in: *) let foo(x) = x + (3*5);; (* 3*5 is recomputed in every application of foo. Better to use 15. To recapitulate, we've seen a number of uses of let so far: 1) to avoid recomputing a value - expression occurs multiple times in body - expression occurs once in body, but body is of recursive function 2) to encapsulate a method that we wish to hide from users of our function 3) to declare storage space that will be re-used between executions 4) to protect data that are about to be destroyed by a side-effect-producing function... POOR-MAN'S OBJECT ORIENTATION REDUX *) type stackop = push of int | pop | empty;; type stackres = pushed of int | popped of int | yes | no;; let stack = let s = ref [] in (function push(arg) -> (s := (arg::(!s)); pushed(arg)) | pop -> let t = (hd (!s)) in (s := (tl (!s)) ; popped(t)) | empty -> if (!s=[]) then yes else no );; stack (push(3));; stack (push(2));; stack (push(1));; stack empty;; stack pop;; stack pop;; stack pop;; stack empty;; stack pop;; (* To do better than that, we need our own exceptions. But let's first look at how nested lets are interpreted... LEXICAL SCOPE (STATIC SCOPE): When many let expressions are nested, a variable may be defined many times - the nearest *enclosing* let expression also determines the value: It is based on the principle that consistent renaming of variables should not effect the value of an expression. *) let y=2 in if (y > (let y=3 in y*y)) then y else -y;; let x = 2 in (let fn x = x + 1 in fn(x));; let y = 3 in (* defining context *) let fn(x) = x + y in let y = 2 in (* calling context *) fn(2);; (* Lexical scope goes a long way to establishing REFERENTIAL TRANSPARENCY - easy to determine where variables obtain their values, because program itself (unlike run-time contexts) is not changing. LISP does NOT have static scope. Scheme does. So does CaML. There's some historical evidence to suggest that in fact LISP's dynamic scoping was a bug that simply nobody wanted to admit to. The only circumstance in which dynamic scoping could be defended as reasonable is in macros and possibly function in-lining. OPERATOR PRECEDENCE: When we write f(x,y) as an application of function f to the pair (x,y), we say that this application has been written in *prefix* notation. An alternative is to use *infix* notation. Some of these are built in, e.g., 3 + 4 is an application of '+' to (3,4), but we write it between its arguments. You can define your own infix operators, too. let sub(x)(y) = x - y;; #infix "sub";; (* # means this is a directive - so this does not evaluate to a value *) 3 sub 4;; (* There are some restrictions on declared infix operators in CaML that are left unstated by the syntax. First, they must correspond to Curried binary functions - a product would be interpreted as one argument (left or right) here. Second, CaML assigns low precendence (loose binding) to all of your operators - the same as a comparator like = or <. You can't define an infix operator with the same precedence as multiplication or exponentiation, for example. Third, all of your infix operators are left-associative. *) 3 sub 4 sub 5;; (* means *) (3 sub 4) sub 5;; (* not *) 3 sub (4 sub 5);; (* Interesting fact: Some people even use *postfix* expressions, e.g., 3 4 +. All HP calculators (used to) do this. Some people call this "reverse Polish notation" (RPN). CaML doesn't support this, though.*) (* EXCEPTIONS The CaML type checker catches type errors, but there are other kinds of errors: *) let f(x) = 1 / x;; let g(x) = hd x;; let h(x) = tl x;; (* All of these are correct - CaML has no problem with their definitions. But errors can still occur at execution: *) f 0;; (* don't try this with floating point arithmetic! *) g [];; h [];; (* When argument(s) to function have correct type, but improper values, ML normally RAISES AN EXCEPTION. User-defined functions can do this too, and you can define your own exceptions. Example: *) exception NegArg;; let rec fact n = if n=0 then 1 else if n>0 then n * fact(n-1) else raise NegArg;; (* Note that exceptions do not affect the type of fact. *) fact -2;; (* Exceptions can have arguments, just like data-types: *) exception NegArg of int;; exception BadTuple of int*string;; raise (NegArg(-2));; raise (BadTuple(4,"abc"));; (* Raised exceptions can be CAUGHT by EXCEPTION HANDLERS that are waiting for them. Uncaught exceptions are reported by the top-level read-eval-print loop, after which computation halts. More on exception handlers below... *) (* Example: choose(n,m), i.e., ( n ) n! ( ) = --------- ( m ) m! (n-m)! There are three possible errors that we could witness with n,m:int 1) n < 0 2) m < 0 3) m > n *) exception Negative of int;; exception TooBig of int;; let choose(n,m) = if n<0 then raise (Negative(n)) else if m<0 then raise (Negative(m)) else if m>n then raise (TooBig(m)) else fact(n) / ( fact(m) * fact(n-m));; choose(6,3);; choose(2,3);; choose(4,-2);; fact(3*choose(2,4));; choose(7.5,6.0);; (* EXCEPTION HANDLING *) let mychoose(n,m) = try choose(n,m) with Negative(_) -> -1 | TooBig(_) -> 0;; mychoose(5,4);; mychoose(5,-4);; mychoose(2,3);; (* Alternatively, you can build error values into your datatypes: *) type response = val of int | error of int*string;; let mychoose(n,m) = try val(choose(n,m)) with Negative(x) -> error(x,"is negative") | TooBig(y) -> error(y,"is too big");; mychoose(5,4);; mychoose(5,-4);; mychoose(2,3);; (* SCOPE OF EXCEPTION HANDLERS: similar to scope of formal parameter binding -- narrowest containing handler takes control *) exception e1;; exception e2;; exception e3;; let h = function 1 -> raise e1 | 2 -> raise e2 | 3 -> raise e3 | _ -> "ok";; let g(n) = try h(n) with e2 -> "error g2" | e3 -> "error g3";; let fn(n) = try g(n) with e1 -> "error f1" | e2 -> "error f2";; fn 0;; fn 1;; fn 3;; fn 2;;