(* PATTERNS CaML allows you to match arguments with *patterns* - structured descriptions of objects containing variables that are bound to remaining pieces of structure. Example: *) let rec fib(n) = if (n=0) then 0 else if (n=1) then 1 else fib(n-1) + fib(n-2);; (* This is a pain. We have already seen match statements, but you can do a lot more than just guess at the literal or match anything with "_". A good pattern in this case employs a variable which is then bound to the argument *) let rec fib(n) = match n with 0 -> 0 | 1 -> 1 | m -> fib(m-1)+fib(m-2);; (* Here, m matches whatever n might be if the match reaches the last case. CaML has a more convenient syntax, however, which reflects the close association between matches and lambda-terms: *) let rec fib = function 0 -> 0 | 1 -> 1 | n -> fib(n-1) + fib(n-2);; (* In this way, the function itself is defined in cases. Another example: *) let null(l) = (l=[]);; let rec listsum l = if (null l) then 0 else (hd l) + listsum (tl l);; listsum [1;2;3];; (* OR *) let rec listsum = function [] -> 0 | l -> (hd l) + listsum (tl l);; (* But this is just the beginning - patterns can be much more complex: *) let rec listsum = function [] -> 0 | (h::t) -> h + listsum t;; (* And look what else CaML does! *) let rec reverse (h::t) = reverse(t) @ [h];; (* Very useful for debugging... *) let rec reverse = function [] -> [] | (h::t) -> reverse(t) @ [h];; (* Here's a clever idea that really benefits from patterns: *) let median x = let rec medhelp = function ((x1::x2::xt),(y1::yt)) -> medhelp(xt,yt) | (_,(median::_)) -> median (* traverse twice as fast through one list *) in medhelp(x,x);; median [1;2;3];; median [1;2;3;4];; median [];; (* WHY BOTHER WITH TYPING: - easier to debug programs: compiler can catch many errors: clashes, misspellings ... - efficiency: typing can be used for "static analysis" in compiler to generate quicker code - documentation: types declare your intent with well-chosen names - overloading/redundancy: also indicates a relevant abstraction Some languages' type systems operate at compile-time, e.g., CaML, Ada Others operate at run-time, e.g., dynamic dispatch for virtual methods in C++ Typing at compile-time is called *static typing* Explicit static typing: code contains declarations, e.g. C: int x,y,z; But it's also possible to infer types: a3 := b4 + 1; what could a3 and b4 be? We can tell because of + and 1! if (test) then ... what is test? Must be bool! This is called *type inferencing* Sound type system: a type system in which all types can always be inferred in any valid program. CaML has an almost sound type system: = is overloaded, some defective programs with exceptions and no real results don't have inferrable result types. RECORD TYPES We have already seen tuples: *) (1,2,"abc");; (* These are useful because they are flat and heterogeneous. But you can also name the different dimensions: *) type professor = {name:string;age:int;salary:int};; {name="gerald penn";age=40;salary=1000000};; (* This is a RECORD (INSTANCE), a generalization of tuples, and it has a RECORD TYPE. {name="gerald penn";age=36;salary=1000000}.salary;; let g = {name="gerald penn";age=36;salary=1000000};; g.salary;; {name="gerald penn";age=36}.salary;; (* Tuples can be thought of as a shorthand for records with unnamed fields, although CaML won't actually let you define records that way. Records are generallly preferable to tuples in that they are: 1) order-independent: now that we've named our components, we can state them in any order. 2) named: these names tell others how we're interpreting the dimensions. This makes our source-code easier to read and understand.*) (* TYPE SYNONYMS *) type waitress = {name:string; age:int; salary:int; tips:int};; (* Expressions like this define new types. They are called *type definitions*. The type didn't exist before, because CaML didn't know that the attributes name, age, salary and tips were allowed to be combined beforehand. On other occasions, however, we just want a new name for a type that already exists: *) type waitresses == waitress list;; type professor == string * int * int;; (* *Type synonyms* are important, not just because they're shorter, but because they document your intentions - giving your types very descriptive names makes your source-code easier for other people (and you) to interpret. Synonyms can be used wherever built-in types can: *) let income (w:waitress) = w.salary + w.tips;; let income (w) = w.salary + w.tips;; let rec total = function ([]:waitresses) -> 0 | (w::wlist) -> income(w) + total(wlist);; let (f:waitress) = {name="flo";age=45;salary=20000;tips=5000};; (* remember: type judgements must be parenthesized *) (* On the other hand, patterns can also be used with records: *) let income {name=_;age=_;salary=s;tips=t} = s+t;; income f;; (* vs. *) let income {salary=s;tips=t} = s+t;; (* record patterns are not nearly as picky as records in CaML - we can drop unused attribute names, which makes it easier to program with large records. *) (* FUNCTION POLYMORPHISM: values (including variables or functions) that can have more than one type *) let rec length l = if (null l) then 0 else 1 + length (tl l);; let rec reverse = function [] -> [] | (h::t) -> reverse(t) @ [h];; let listify x = [x];; let apply (f,x) = (f x);; apply(float_of_int,5);; (* Without polymorphism, we would need many functions: int-length, int-reverse, float-length, float-reverse, etc. There are three kinds of polymorphism: AD-HOC POLYMORPHISM: aka. overloading, different operations known by same name, which compiler/interpreter must resolve. INHERITANCE-BASED POLYMORPHISM: subclasses define new versions of methods possessed by superclass. OO languages use this a lot. public class Employee{ public int salary; public void income() = {return salary;} } public class Waitress extends Employee{ public int tips; public void income() = {return (salary + tips);} public class Professor extends Employee; PARAMETRIC POLYMORPHISM: types/type variables explicitly used as parameters heterogeneous (C++): one copy of function code per instantiation of parms homogeneous (CaML): one copy of code only - parameters used internally Parameters can also be used in type synonyms: *) type 'a pair == 'a * 'a;; ((1,2): int pair);; type 'a func == 'a -> 'a;; let square(x) = x *. x;; (square: float func);; (* Certain operators restrict polymorphism: arithmetic: +,-,* vs. +.,-.,*. division-related: mod (in)equality: <, >, =, (but =., <., >. do exist) Boolean connectives: and, or, etc. string operators: ^ type conversion: float_of_int, string_of_int, round_num Others do not, e.g., tuples, list operators. *) (* TYPE DEFINITIONS *) type pound == float;; type kg == float;; let pound_of_kg (n:kg) = ((n *. 2.2): pound);; pound_of_kg (1.0);; pound_of_kg (1.0:pound);; pound_of_kg (pound_of_kg (1.0));; (* What's wrong here? kg and pound are SYNONYMS for float - so CaML doesn't really care which we use. (* New types can be DEFINED using explicit TYPE CONSTRUCTORS; *) type pound = lbs of float;; type kilogram = kg of float;; lbs 3.0;; kg 3.0;; let pound_of_kg (kg x) = lbs (x*.2.2);; pound_of_kg(1.0);; pound_of_kg(kg 1.0);; pound_of_kg(lbs 1.0);; pound_of_kg(pound_of_kg(kg 1.0));; (* There can be more than 1 constructor too: *) type money = coins of int | notes of int | cheque of string * float;; cheque("cibc",100.70);; (* money is called a *variant type*. *) (* In function definitions, pattern-matching then distinguishes among them: *) let amount = function (coins n) -> float_of_int(n)/.100.0 | (notes n) -> float_of_int(n) | (cheque (_,n)) -> n;; (* Note the syntactic parallelism between variant type definitions and matching patterns in function definitions. One exists in the world of types, the other, in the world of objects. We can use variant types to better cope with the homogeneity of CaML's polymorphic type system: *) [1;2.0;3.5;4];; (* Ha! No way *) type number = f of float | i of int;; [i 1;f 2.0;f 3.5;i 4];; let rec sumlist = function [] -> 0.0 | ((f x)::t) -> x +. sumlist(t) | ((i x)::t) -> float_of_int(x) +. sumlist(t);; sumlist [i 1;f 2.0;f 3.5;i 4];; (* User-defined datatypes can be recursive: *) type tree = leaf of int | node of tree * tree;; let rec sumtree = function (leaf n) -> n | (node(t1,t2)) -> sumtree(t1) + sumtree(t2);; let t = node(leaf(3),node(leaf(2),leaf(5)));; sumtree t;; (* User-defined types can also be parametrically polymorphic: *) type 'a tree = leaf of 'a | node of ('a tree) * ('a tree);; leaf(2);; node(leaf("ab"),leaf("cd"));; node(leaf(2),leaf("cd"));; (* what's wrong? *) let rec count = function (leaf _) -> 1 | (node (t1,t2)) -> count t1 + count t2;;