=programming
This is a tutorial for PoML (Polymorphic ML, pronounced "pommel") which is a programming language (targeting LLVM bitcode) that doesn't quite exist yet.
// This is a single-line comment.
//.
This is a comment block.
//. These can be nested, too. .//
.//
// This is a statement. It says that a variable "x" exists with the value of 5. Statements end with whitespace then a period.
x = 5 .
// Variable names are case sensitive for the first character of the name, and case-insensitive for the rest.
// This is a command. It tells the computer to set the value of x to 7. Commands end with a semicolon.
x << 7;
// This is a tuple. It stores a fixed number of things in a single variable.
my_tuple = 1, 2.2, "three" .
// "Show" is a function that converts its input to a string, adds a newline, and prints that to stdout.
Show my_tuple.2; // prints the third element of the tuple: "three"
// This is a record.
// Some code goes inside the record, and all the newly defined variables at the end become values in it.
my_record = {a = "first" . b = 2 . c = 3 .} .
Show my_record.c; // prints "3"
// Suppose we want to create an "object" that contains an int and a function that doubles and returns that int.
// We can do that with a function that creates a record:
NewIntObject () =
{
a = 1 .
GetVal () = a << a*2; a .
}
IntObj = NewIntObject() .
x = IntObj.GetVal() .
// This is a string.
s = "text" .
// Code can be put in a string using []. The result is automatically converted to a string.
x = 10 .
y = 20 .
s = "x is [x] and y is [y]" .
Show s; // prints "x is 10 and y is 20"
// Special symbols can be escaped with \.
s = "use \\ to escape \" and \[" .
// Strings defined with !" can contain special symbols and matching !""! pairs.
s = !"This string (defined with !"[string]"!) does not require escaping " and does not execute code in []."! .
// This is an array.
my_array = [1; 2; 3; 4; 5] .
// This selects the first element, converts it to a string, and prints that.
Show my_array.[0]; // prints "1"
// The "." is there to clarify that this is indexing my_array, not defining a new array [0].
// Arrays can also be declared without filling them, using a type annotation:
declared_array is int[10] .
// Suppose we want to make a size 100 array, filled with the letter "a".
// We can do that with a function Make in Array:
a_array = Array.Make 100 "a" .
// Array is a record that contains functions for arrays. It's included in the default library.
// array.len gives the length of an array:
length = a_array.len .
// If you want to write "Make" instead of "Array.Make" you can open Array.
// This brings everything in Array into the current namespace.
open Array;
b_array = Make 100 "b" .
// Any record can be opened. Let's open my_record:
open my_record;
Show a; // prints "first"
// We're done with those records now, so we can close them:
close Array;
close my_record;
// These commands only open or close records within the current lexical scope.
// This is a function that outputs (its input + 1).
AddOne x = x + 1 .
// By convention, functions use CamelCase names, and data uses names_with_underscores.
Show (AddOne 5); // prints "6"
// Functions are "pass by reference" so this function adds 1 to its input:
Increment x = x << x+1 .
// Maybe we want our function to do something different when the input is a string.
// We can define a different function for string inputs, like this:
maybe AddOne (s is string) = s & " one" . // & is used to concatenate strings together.
Show (AddOne "number"); // prints "number one"
// This is an equivalent way of assigning multiple definitions to AddOne:
Increment x = x + 1 .
AppendOne (s is string) = s & " one" .
AddOne = maybe Increment maybe AppendOne .
// Functions always use the first such "maybe" definition that accepts the input types.
// Like in OCaml, recursive function definitions must start with rec.
// This is an infinite loop that repeatedly adds 1 to its input:
rec IncrementForever x = IncrementForever (x+1) .
// Functions can be chained together using " ' ".
0 ' AddOne ' AddOne ' AddOne ' Show; // prints "3"
// "()" is nil, which does nothing.
// If you want to call a function that takes no inputs, you use nil as a "fake" input.
ReturnTen () = 10 .
x = ReturnTen .
Show x . // prints "x is () -> int" which is the type signature of ReturnTen.
Show(ReturnTen()); // prints "10"
nil = () .
x = ReturnTen nil .
Show x . // also prints "10"
LAMBDA & PATTERN MATCHING
// We can define anonymous functions ("lambdas"), and those can be assigned to variables.
// The following 2 definitions are equivalent:
AddOne x = x + 1 .
AddOne = \x: x+1 .
// Lambda definitions can be ended by a "." matching a definition before they start, a "]" to explicitly end them, or by enclosing them in ().
// \\ is equivalent to \ but must be the first match of a lambda, so \\ can start a nested lambda.
// Constants can also be matched against:
rec factorial =
\0 \1 : 1
\n: n * factorial(n-1)
.
// This construct can also apply different cases to variant types.
// Suppose we have an enum:
type MyEnum = One | Two | Three .
// We can do different things depending on the enum:
PrintMyEnum =
\One: Show "First";
\Two: Show "Second";
\: Show "Third"; // The \: at the end indicates a default option.
.
PrintMyEnum Two; // prints "Second"
// As shorthand, when a lambda does not contain ":" and contains some combination of "x", "y", "z" it binds inputs to those letters.
GreaterThanTwo = \x: x > 2] .
GreaterThanTwo = \x > 2] .
// These definitions are equivalent.
// (: body) is shorthand for (\(): body)
b = (: x > 2)
b = (\(): x > 2)
// These expressions are equivalent.
// Fallthrough of cases in a lambda can be done explicitly using ->:
FizzBuzz n = (n mod 3 == 0), (n mod 5 == 0) '
\(true, _) as input: Show "Fizz"; input -> // input goes to the first matching case below this one
\_, true: Show "Buzz";
\false, false: Show "[n]";
];
Show "\n";
.
ARRAY FUNCTIONS
// Suppose we want to add one to every element in an array.
// We can do that with Iter (iterate):
Iter input_array \x << x+1] ; // adds 1 to every element of an array
// If we instead want a new array that's changed, we can use Map:
new_array = Map input_array \x+1] .
// Map and Iter can take a tuple of any number of arrays, if you give them a function that takes multiple inputs:
sum_array = Map (array_1, array_2) \x+y] .
// sum_array is as long as the shortest input array.
// Suppose we want "+" to also add 2 arrays together. We can define that with:
maybe (+) a b = Map (a,b) (+) .
// The (+) makes the operator "+" be treated as a regular function.
// (This overloading for + is included in the standard library.)
// true is a boolean value.
// AllTrue checks if a function returns true for every element in an array or tuple of arrays.
name = "test string" .
banned_strings = ["foo"; "bar"] .
result = banned_strings ' AllTrue \s: name != s] . // result is true
// To filter an array, we can use Filter with a function that takes an element and returns a bool.
filtered_array = Filter input_array \x>0] .
//.
Filter uses array slices.
An array value contains a pointer and a length.
An array slice uses different pointer and length values to point at part of an array.
So, Filter allocates an array of the same size as the input, and returns a slice that might be shorter.
.//
// You can create array slices as follows:
array_slice = input_array.[start, end] .
// "end" inside an array index is the index of the last element.
// So, this creates a slice of everything except the first and last elements:
slice = input_array.[1, end-1] .
// You may want to copy the filtered array slice into a smaller array, so the larger array can be garbage collected.
// Copy is the universal copy function in PoML.
filtered_array = Copy(Filter input_array \x>0]) .
// Equivalently, that can instead be written as a chain:
filtered_array = input_array ' Filter \x>0] ' Copy .
//
Map and Iter can accept a function of an integer as any input except the
first and last.
// Those inputs are treated as an array with values of
function(index).
output_array = Map (input_array, \x]) \x+y] .
// output_array has the index added to each element of input_array
// Suppose we want a more advanced data structure that acts like an array but allows fast insertions and deletions.
// We can convert a basic array into a dynamic array:
dynamic_array = DyAr [0; 1; 2; 3] .
first = dynamic_array.[0] .
// Why does the same access syntax work for data structures that may be defined in libraries?
// The compiler actually translates that to a special function Dot:
first = Dot dynamic_array (0) .
// How about assignments to that? The compiler does the following series of transforms:
dynamic_array.[0] << 20;
Dot dynamic_array (0) << 20;
Assign dynamic_array (0) 20;
// Because << causes this kind of transformation, << has a unique type and thus cannot be conditionally returned by a function.
// In other words, functions must either never return << or always return it.
CONDITIONALS
// PoML's equivalent of "if" statements has the following structure:
condition_a:
action_1;
| condition_b: // optional "else"
action_2;
| condition_c: // there can be any number of these
action_3;
|: // optional default case
default_action;
] // end
// conditionals can also return values:
b = true .
bool_text = b: "true" |: "false" .
// Here is a factorial function using this:
rec factorial n =
n==0 || n==1 : 1
|: n * factorial(n-1)
. // The "]" to end the conditional is implied when a definition containing it ends.
THROW & GOTO
// throw goes downward until a matching catch is found.
throw #print_this x; // # marks an exception type
Show "This text is skipped."; // this line is not executed
catch print_this x: Show x;; // catch statements end with ";;"
// This prints "22".
// goto is like throw, but it goes up instead of down.
// So, this is an infinite loop:
catch 10;;
Show "LOOK AROUND YOU";
goto #10;
// The
target of jumps cannot have
variables in scope that the origin
does not.
// If there are any such variables, that's an invalid jump that gives a compiler error.
// Exceptions must be caught, or else the compiler gives an error.
// If you want to crash the program, use fail instead.
fail; // crashes the program and prints the location of this fail in the source code
fail "error message"; // crashes the program and prints "error message"
LOOPS
// for and while loops are functions defined using goto. The C-style for loop uses this definition:
for n test update body =
i = n .
catch loop_start;;
test i:
body i;
update i;
goto #loop_start;
.
// This prints numbers from 0 to 9:
for 0 \x<10] (++) \i:
// In C, this would be: for (int i=0; i<10; i++)
Show i;
];
// for can also take a single input, and repeat that many times. Here's that definition:
maybe for n body =
i = n .
catch loop_start;;
i
!= 0:
body();
i--;
goto #loop_start;
.
// Here is an example of that usage:
x = 0 .
for 10 (:
x << x + 2;
);
Show x; // prints "20"
// Here is an equivalent way of writing that loop:
x = 0 .
body() = x << x + 2 .
for 10 body;
Show x;
// Here is a for loop that prints 99 Bottles of Beer:
ShowLine n = Show "[n] bottles of
beer on the wall. Take one down, pass it around,\n" .
for 99 \x>1] (--)
ShowLine;
Show "1 bottle of beer on the wall. Take it down, pass it
around, no more bottles of beer.\n";
// Here is the while loop definition:
while test body =
catch loop_start;;
test():
body();
goto #loop_start;
.
// Here is a while loop example, which prints 0 to 9:
i = 0 .
while (:i<10) (:
Show i;
i++;
);
// Here is an equivalent way of writing that loop:
i = 0 .
test() = i<10 .
body() =
Show i;
i++;
.
while test body;
// throw cannot jump into a named function, but can jump into a lambda or an if clause when no local variables are defined above the catch.
// That makes low-level optimizations such as Duff's device possible:
n = count / 4 .
remainder = count % 4 .
throw [#; #1; #2; #3].[remainder]; // a # does nothing when thrown
for n \:
f data;
catch 3;;
f data;
catch 2;;
f data;
catch 1;;
f data;
];
// The above applies f to data for count times, with an unrolled loop.
VARIANT TYPES
// Enums with multiple values are defined using "|" like this:
type MyEnum = One | Two | Three .
// If we want to represent a tree of strings, we can use a recursive type:
type StringTree = Node (StringTree[]) | Leaf string .
// That says a StringTree element can either be a Leaf with a string OR a Node with a StringTree array.
// The following lines all define StringTree values.
leaf = Leaf "text" .
node = Node [leaf, leaf, leaf] .
string_tree = ["a"; ["b"; "c"]; "d"] is StringTree .
// Declaring a variant type for the array first checks for a matching StringTree constructor.
// When that fails, it checks for a matching StringTree constructor for each element. This is done recursively while it fails.
// When multiple constructors match, the first constructor in the definition order is used.
// We can process a StringTree using pattern matching:
rec PrintStringTree =
\Leaf s: Show s;
\Node n: Iter n PrintStringTree;
.
// That's a recursive function that prints all the strings in a StringTree.
// Variant types can also be used to create mixed arrays:
type IntOrString = Int int | String string .
mixed_array = [1, 2, 3, "a", "b", "c"] is IntOrString[] .
POINTERS & GC
// Pointers are created with $ and dereferenced with @.
p = $20 .
Show p; // prints "int ref"
Show @p; // prints "20"
// PoML uses reference counting for garbage collection: data keeps a counter of how many pointers to it exist.
// Everything having a counter is inefficient, so only specially marked data has a reference counter.
// Only that marked data can have pointers to it copied.
// Suppose you have an array of strings. Each string in a is a reference to a character array.
a = Array.Make 100 "some string" .
// The array is garbage collected when a goes out of scope.
// Now we assign the array pointer to a new variable.
b = a .
// This is not a problem: the array is collected when both a and b are out of scope.
// Next, we assign one to a variable.
s = a.[n] .
// Now s is holding a reference to one of them, preventing garbage collection.
// If s goes out of scope before a or at the same time, then there is still no problem.
// But we might want to collect the rest of a before s.
// So, to prevent memory leaks, that's a compiler error if a goes out of scope before s does.
// To avoid the compiler error, the strings in a must all be marked as reference-counted using a special rc type annotation.
a = Array.Make 100 ("some string" is rc) .
// Now there's no problem with storing strings from a in a variable.
s = a[n] .
// What if we want to use circular references? For example, consider a pair of arrays A and B sometimes containing pointers to each other's elements.
// In that case, we need to contain A and B in a single object containing immutable pointers to both:
root_object = ($A is const, $B is const) is rc .
// That causes A and B to be reference counted and garbage collected together.
See the garbage collection implementation page for more details on this.
OPERATORS
// Users can define custom operators, which can be binary or unary.
// Here's a definition for C's "+=" operator.
(+=) x y = x << x+y .
// Unary operators are applied before functions, and cannot have whitespace between them and their variable.
// Here's an increment operator:
(++) x = x << x+1 .
// That's used after a variable, because it doesn't return a value:
x++;
// Unary operators that return a value are instead used before the variable:
(<+>) x = x + 1 .
x_plus_1 = <+>x .
TYPES
// Multiple function arguments are converted into a tuple, so the following 2 function definitions are equivalent:
let f x y z = 2*x*y*z;
let f (x, y, z) = 2*x*y*z;
// Those both have the type signature:
f is (int, int, int) -> int
// Currying of functions is not allowed, because it complicates typechecking and makes code harder to read. Instead, you should define a new function with constants for some parameters.
// Variables have a base type, which indicates their actual representation in a computer, and can also have tags, which indicate what overloaded functions can or should be used for them.
// For example, a string is a byte array that's tagged string. Here's the type definition for strings:
type string = byte[] +string .
// Type definitions can include a list of tags to give the base type.
// Type annotations can use +required_tag to require a tag, -banned_tag to exclude banned_tag, and (+tag1 | +tag2) to work with either tag1 OR tag2.
// const is a special type tag that disallows mutation.
x = 10 is const . // the value of this x cannot be modified