PoML Tutorial

=programming

This is a tutorial for PoML (Polymorphic ML, pronounced "pommel") which is a programming language (targeting LLVM bitcode) that doesn't quite exist yet.

// This is a single-line comment.

//.

This is a comment block.

//. These can be nested, too. .//

.//

// This is a statement. It says that a variable "x" exists with the value of 5. Statements end with whitespace then a period.

x = 5 .

// Variable names are case sensitive for the first character of the name, and case-insensitive for the rest.

// This is a command. It tells the computer to set the value of x to 7. Commands end with a semicolon.

x << 7;

// This is a tuple. It stores a fixed number of things in a single variable.

my_tuple = 1, 2.2, "three" .

// "Show" is a function that converts its input to a string, adds a newline, and prints that to stdout.

Show my_tuple.2; // prints the third element of the tuple: "three"

// This is a record.

// Some code goes inside the record, and all the newly defined variables at the end become values in it.

my_record = {a = "first" . b = 2 . c = 3 .} .

Show my_record.c; // prints "3"

// Suppose we want to create an "object" that contains an int and a function that doubles and returns that int.

// We can do that with a function that creates a record:

NewIntObject () =

{

a = 1 .

GetVal () = a << a*2; a .

}

IntObj = NewIntObject() .

x = IntObj.GetVal() .

// This is a string.

s = "text" .

// Code can be put in a string using []. The result is automatically converted to a string.

x = 10 .

y = 20 .

s = "x is [x] and y is [y]" .

Show s; // prints "x is 10 and y is 20"

// Special symbols can be escaped with \.

s = "use \\ to escape \" and \[" .

// Strings defined with !" can contain special symbols and matching !""! pairs.

s = !"This string (defined with !"[string]"!) does not require escaping " and does not execute code in []."! .

// This is an array.

my_array = [1; 2; 3; 4; 5] .

// This selects the first element, converts it to a string, and prints that.

Show my_array.[0]; // prints "1"

// The "." is there to clarify that this is indexing my_array, not defining a new array [0].

// Arrays can also be declared without filling them, using a type annotation:

declared_array is int[10] .

// Suppose we want to make a size 100 array, filled with the letter "a".

// We can do that with a function Make in Array:

a_array = Array.Make 100 "a" .

// Array is a record that contains functions for arrays. It's included in the default library.

// array.len gives the length of an array:

length = a_array.len .

// If you want to write "Make" instead of "Array.Make" you can open Array.

// This brings everything in Array into the current namespace.

open Array;

b_array = Make 100 "b" .

// Any record can be opened. Let's open my_record:

open my_record;

Show a; // prints "first"

// We're done with those records now, so we can close them:

close Array;

close my_record;

// These commands only open or close records within the current lexical scope.

// This is a function that outputs (its input + 1).

AddOne x = x + 1 .

// By convention, functions use CamelCase names, and data uses names_with_underscores.

Show (AddOne 5); // prints "6"

// Functions are "pass by reference" so this function adds 1 to its input:

Increment x = x << x+1 .

// Maybe we want our function to do something different when the input is a string.

// We can define a different function for string inputs, like this:

maybe AddOne (s is string) = s & " one" . // & is used to concatenate strings together.

Show (AddOne "number"); // prints "number one"

// This is an equivalent way of assigning multiple definitions to AddOne:

Increment x = x + 1 .

AppendOne (s is string) = s & " one" .

AddOne = maybe Increment maybe AppendOne .

// Functions always use the first such "maybe" definition that accepts the input types.

// Like in OCaml, recursive function definitions must start with rec.

// This is an infinite loop that repeatedly adds 1 to its input:

rec IncrementForever x = IncrementForever (x+1) .

// Functions can be chained together using " ' ".

0 ' AddOne ' AddOne ' AddOne ' Show; // prints "3"

// "()" is nil, which does nothing.

// If you want to call a function that takes no inputs, you use nil as a "fake" input.

ReturnTen () = 10 .

x = ReturnTen .

Show x . // prints "x is () -> int" which is the type signature of ReturnTen.

Show(ReturnTen()); // prints "10"

nil = () .

x = ReturnTen nil .

Show x . // also prints "10"

LAMBDA & PATTERN MATCHING

// We can define anonymous functions ("lambdas"), and those can be assigned to variables.

// The following 2 definitions are equivalent:

AddOne x = x + 1 .

AddOne = \x: x+1 .

// Lambda definitions can be ended by a "." matching a definition before they start, a "]" to explicitly end them, or by enclosing them in ().

// \\ is equivalent to \ but must be the first match of a lambda, so \\ can start a nested lambda.

// Constants can also be matched against:

rec factorial =

\0 \1 : 1

\n: n * factorial(n-1)

// This construct can also apply different cases to variant types.

// Suppose we have an enum:

type MyEnum = One | Two | Three .

// We can do different things depending on the enum:

PrintMyEnum =

\One: Show "First";

\Two: Show "Second";

\: Show "Third"; // The \: at the end indicates a default option.

PrintMyEnum Two; // prints "Second"

// As shorthand, when a lambda does not contain ":" and contains some combination of "x", "y", "z" it binds inputs to those letters.

GreaterThanTwo = \x: x > 2] .

GreaterThanTwo = \x > 2] .

// These definitions are equivalent.

// (: body) is shorthand for (\(): body)

b = (: x > 2)

b = (\(): x > 2)

// These expressions are equivalent.

// Fallthrough of cases in a lambda can be done explicitly using ->:

FizzBuzz n = (n mod 3 == 0), (n mod 5 == 0) '

\(true, _) as input: Show "Fizz"; input -> // input goes to the first matching case below this one

\_, true: Show "Buzz";

\false, false: Show "[n]";

];

Show "\n";

ARRAY FUNCTIONS

// Suppose we want to add one to every element in an array.

// We can do that with Iter (iterate):

Iter input_array \x << x+1] ; // adds 1 to every element of an array

// If we instead want a new array that's changed, we can use Map:

new_array = Map input_array \x+1] .

// Map and Iter can take a tuple of any number of arrays, if you give them a function that takes multiple inputs:

sum_array = Map (array_1, array_2) \x+y] .

// sum_array is as long as the shortest input array.

// Suppose we want "+" to also add 2 arrays together. We can define that with:

maybe (+) a b = Map (a,b) (+) .

// The (+) makes the operator "+" be treated as a regular function.

// (This overloading for + is included in the standard library.)

// true is a boolean value.

// AllTrue checks if a function returns true for every element in an array or tuple of arrays.

name = "test string" .

banned_strings = ["foo"; "bar"] .

result = banned_strings ' AllTrue \s: name != s] . // result is true

// To filter an array, we can use Filter with a function that takes an element and returns a bool.

filtered_array = Filter input_array \x>0] .

//.

Filter uses array slices.

An array value contains a pointer and a length.

An array slice uses different pointer and length values to point at part of an array.

So, Filter allocates an array of the same size as the input, and returns a slice that might be shorter.

.//

// You can create array slices as follows:

array_slice = input_array.[start, end] .

// "end" inside an array index is the index of the last element.

// So, this creates a slice of everything except the first and last elements:

slice = input_array.[1, end-1] .

// You may want to copy the filtered array slice into a smaller array, so the larger array can be garbage collected.

// Copy is the universal copy function in PoML.

filtered_array = Copy(Filter input_array \x>0]) .

// Equivalently, that can instead be written as a chain:

filtered_array = input_array ' Filter \x>0] ' Copy .

// Map and Iter can accept a function of an integer as any input except the first and last.
// Those inputs are treated as an array with values of function(index).

output_array = Map (input_array, \x]) \x+y] .

// output_array has the index added to each element of input_array

// Suppose we want a more advanced data structure that acts like an array but allows fast insertions and deletions.

// We can convert a basic array into a dynamic array:

dynamic_array = DyAr [0; 1; 2; 3] .

first = dynamic_array.[0] .

// Why does the same access syntax work for data structures that may be defined in libraries?

// The compiler actually translates that to a special function Dot:

first = Dot dynamic_array (0) .

// How about assignments to that? The compiler does the following series of transforms:

dynamic_array.[0] << 20;

Dot dynamic_array (0) << 20;

Assign dynamic_array (0) 20;

// Because << causes this kind of transformation, << has a unique type and thus cannot be conditionally returned by a function.

// In other words, functions must either never return << or always return it.

CONDITIONALS

// PoML's equivalent of "if" statements has the following structure:

condition_a:

action_1;

| condition_b: // optional "else"

action_2;

| condition_c: // there can be any number of these

action_3;

|: // optional default case

default_action;

] // end

// conditionals can also return values:

b = true .

bool_text = b: "true" |: "false" .

// Here is a factorial function using this:

rec factorial n =

n==0 || n==1 : 1

|: n * factorial(n-1)

. // The "]" to end the conditional is implied when a definition containing it ends.

THROW & GOTO

// throw goes downward until a matching catch is found.

throw #print_this x; // # marks an exception type

Show "This text is skipped."; // this line is not executed

catch print_this x: Show x;; // catch statements end with ";;"

// This prints "22".

// goto is like throw, but it goes up instead of down.

// So, this is an infinite loop:

catch 10;;

Show "LOOK AROUND YOU";

goto #10;

// The target of jumps cannot have variables in scope that the origin does not.

// If there are any such variables, that's an invalid jump that gives a compiler error.

// Exceptions must be caught, or else the compiler gives an error.

// If you want to crash the program, use fail instead.

fail; // crashes the program and prints the location of this fail in the source code

fail "error message"; // crashes the program and prints "error message"

LOOPS

// for and while loops are functions defined using goto. The C-style for loop uses this definition:

for n test update body =
    i = n .
    catch loop_start;;
    test i:
        body i;
        update i;
        goto #loop_start;
    .

// This prints numbers from 0 to 9:

for 0 \x<10] (++) \i:     // In C, this would be: for (int i=0; i<10; i++)
    Show i;
    ];

// for can also take a single input, and repeat that many times. Here's that definition:

maybe for n body =
    i = n .
    catch loop_start;;
    i != 0:
        body();
        i--;
        goto #loop_start;
    .

// Here is an example of that usage:

x = 0 .

for 10 (:

x << x + 2;

);

Show x; // prints "20"

// Here is an equivalent way of writing that loop:

x = 0 .

body() = x << x + 2 .

for 10 body;

Show x;

// Here is a for loop that prints 99 Bottles of Beer:

ShowLine n = Show "[n] bottles of beer on the wall. Take one down, pass it around,\n" .
for 99 \x>1] (--) ShowLine;
Show "1 bottle of beer on the wall. Take it down, pass it around, no more bottles of beer.\n";

// Here is the while loop definition:

while test body =
    catch loop_start;;
    test():
        body();
        goto #loop_start;
    .

// Here is a while loop example, which prints 0 to 9:

i = 0 .

while (:i<10) (:

Show i;

i++;

);

// Here is an equivalent way of writing that loop:

i = 0 .

test() = i<10 .

body() =

Show i;

i++;

while test body;

// throw cannot jump into a named function, but can jump into a lambda or an if clause when no local variables are defined above the catch.

// That makes low-level optimizations such as Duff's device possible:

n = count / 4 .

remainder = count % 4 .

throw [#; #1; #2; #3].[remainder]; // a # does nothing when thrown

for n \:

f data;

catch 3;;

f data;

catch 2;;

f data;

catch 1;;

f data;

];

// The above applies f to data for count times, with an unrolled loop.

VARIANT TYPES

// Enums with multiple values are defined using "|" like this:

type MyEnum = One | Two | Three .

// If we want to represent a tree of strings, we can use a recursive type:

type StringTree = Node (StringTree[]) | Leaf string .

// That says a StringTree element can either be a Leaf with a string OR a Node with a StringTree array.

// The following lines all define StringTree values.

leaf = Leaf "text" .

node = Node [leaf, leaf, leaf] .

string_tree = ["a"; ["b"; "c"]; "d"] is StringTree .

// Declaring a variant type for the array first checks for a matching StringTree constructor.

// When that fails, it checks for a matching StringTree constructor for each element. This is done recursively while it fails.

// When multiple constructors match, the first constructor in the definition order is used.

// We can process a StringTree using pattern matching:

rec PrintStringTree =

\Leaf s: Show s;

\Node n: Iter n PrintStringTree;

// That's a recursive function that prints all the strings in a StringTree.

// Variant types can also be used to create mixed arrays:

type IntOrString = Int int | String string .

mixed_array = [1, 2, 3, "a", "b", "c"] is IntOrString[] .

POINTERS & GC

// Pointers are created with $ and dereferenced with @.

p = $20 .

Show p; // prints "int ref"

Show @p; // prints "20"

// PoML uses reference counting for garbage collection: data keeps a counter of how many pointers to it exist.

// Everything having a counter is inefficient, so only specially marked data has a reference counter.

// Only that marked data can have pointers to it copied.

// Suppose you have an array of strings. Each string in a is a reference to a character array.

a = Array.Make 100 "some string" .

// The array is garbage collected when a goes out of scope.

// Now we assign the array pointer to a new variable.

b = a .

// This is not a problem: the array is collected when both a and b are out of scope.

// Next, we assign one to a variable.

s = a.[n] .

// Now s is holding a reference to one of them, preventing garbage collection.

// If s goes out of scope before a or at the same time, then there is still no problem.

// But we might want to collect the rest of a before s.

// So, to prevent memory leaks, that's a compiler error if a goes out of scope before s does.

// To avoid the compiler error, the strings in a must all be marked as reference-counted using a special rc type annotation.

a = Array.Make 100 ("some string" is rc) .

// Now there's no problem with storing strings from a in a variable.

s = a[n] .

// What if we want to use circular references? For example, consider a pair of arrays A and B sometimes containing pointers to each other's elements.

// In that case, we need to contain A and B in a single object containing immutable pointers to both:

root_object = ($A is const, $B is const) is rc .

// That causes A and B to be reference counted and garbage collected together.

See the garbage collection implementation page for more details on this.

OPERATORS

// Users can define custom operators, which can be binary or unary.

// Here's a definition for C's "+=" operator.

(+=) x y = x << x+y .

// Unary operators are applied before functions, and cannot have whitespace between them and their variable.

// Here's an increment operator:

(++) x = x << x+1 .

// That's used after a variable, because it doesn't return a value:

x++;

// Unary operators that return a value are instead used before the variable:

(<+>) x = x + 1 .

x_plus_1 = <+>x .

TYPES

// Multiple function arguments are converted into a tuple, so the following 2 function definitions are equivalent:

let f x y z = 2*x*y*z;
let f (x, y, z) = 2*x*y*z;

// Those both have the type signature:

f is (int, int, int) -> int

// Currying of functions is not allowed, because it complicates typechecking and makes code harder to read. Instead, you should define a new function with constants for some parameters.

// Variables have a base type, which indicates their actual representation in a computer, and can also have tags, which indicate what overloaded functions can or should be used for them.

// For example, a string is a byte array that's tagged string. Here's the type definition for strings:

type string = byte[] +string .

// Type definitions can include a list of tags to give the base type.

// Type annotations can use +required_tag to require a tag, -banned_tag to exclude banned_tag, and (+tag1 | +tag2) to work with either tag1 OR tag2.

// const is a special type tag that disallows mutation.

x = 10 is const . // the value of this x cannot be modified