Compare commits
12 Commits
3a3bf2c674
...
compiler
| Author | SHA1 | Date | |
|---|---|---|---|
|
8d301a6fc2
|
|||
|
81dfc07867
|
|||
|
bd6acf89e0
|
|||
|
36ef8f2a22
|
|||
|
3a7f3971ba
|
|||
|
b5b0a44400
|
|||
|
b2e3f5703b
|
|||
|
2d038279f2
|
|||
|
fae7bd8077
|
|||
|
5e91f6e8fa
|
|||
|
7cdd4ee759
|
|||
|
a250d96c63
|
21
.woodpecker/publish.yaml
Normal file
21
.woodpecker/publish.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
when:
|
||||
event: [push, cron, pull_request, manual]
|
||||
|
||||
steps:
|
||||
- name: Build Nightly Artifact
|
||||
image: ocaml/opam:debian-11-ocaml-5.4
|
||||
commands:
|
||||
- opam install . --deps-only
|
||||
- opam exec -- dune build
|
||||
- mkdir -p dist
|
||||
- opam exec -- dune install --prefix=$(pwd)/dist
|
||||
|
||||
- tar czvf ollisp-nightly-amd64.tar.gz -C dist .
|
||||
- name: Publish to Gitea
|
||||
image: curlimages/curl
|
||||
environment:
|
||||
GITEA_TOKEN:
|
||||
from_secret: package_token
|
||||
commands:
|
||||
- curl -v --user "$CI_REPO_OWNER:$GITEA_TOKEN" --upload-file ollisp-nightly-amd64.tar.gz $CI_FORGE_URL/api/packages/$CI_REPO_OWNER/generic/olisp/nightly/ollisp-nightly-amd64.tar.gz?duplicate_upgrade=true
|
||||
|
||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2026 Emin Arslan
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
210
doc/env.md
Normal file
210
doc/env.md
Normal file
@@ -0,0 +1,210 @@
|
||||
This document holds my design notes for lexical and global environments
|
||||
for this compiler. I have not yet named the language.
|
||||
|
||||
# Closures
|
||||
|
||||
The environment system implements flat closures.
|
||||
When a closure is created at runtime, all free variables
|
||||
it uses are packaged as part of the function object, then the function
|
||||
body uses a GetFree instruction to get those free variables by an index.
|
||||
|
||||
(Free variables are propagated from inner closures outwards. This is necessary,
|
||||
as this also handles multiple-argument functions gracefully.)
|
||||
|
||||
```scheme
|
||||
(let ((a 10))
|
||||
(print (+ a 5)))
|
||||
```
|
||||
|
||||
This code will be compiled as a lambda that takes a single parameter and executes
|
||||
the body `(print (+ a 5))`, which is called immediately with the value 10.
|
||||
|
||||
The compiler tries to perform symbol resolution on expressions in the body of the
|
||||
let as well, however it sees no other expressions creating further scopes.
|
||||
|
||||
Since there are two free symbols in this code (`+` and `print`), and the surrounding
|
||||
environment does not have these two symbols defined locally, both of these symbols
|
||||
will be resolved to their global definitions directly.
|
||||
|
||||
Now let's examine a classic example of closures:
|
||||
|
||||
```scheme
|
||||
(define (adder x)
|
||||
(lambda (y) (+ x y)))
|
||||
```
|
||||
|
||||
The adder function takes an argument x, and creates returns a function that adds x
|
||||
to its argument.
|
||||
|
||||
This is implemented by a compiler pass that resolves symbols. Starting from top-level
|
||||
expressions, it scans downwards, noting every free symbol. A free symbol is one
|
||||
that is used in an expression, yet has no value defined locally in that expression.
|
||||
In other words, its value must come from the surrounding scope.
|
||||
|
||||
In this example, the adder function has a symbol x that is a part of its function definition.
|
||||
This is clearly not a free variable. However, examining the inner lambda expression,
|
||||
we can see that it uses y (which is not free) and x. The value of x is not defined
|
||||
as part of the lambda expression, so it must be free.
|
||||
|
||||
The compiler, seeing this, notes that the inner lambda has a free variable `x`, and a parameter
|
||||
`y`. Thus, the lambda has 1 free variable and 1 parameter. This means the closure object will have
|
||||
a code pointer along with an array of length 1 forming the storage for the free variable(s).
|
||||
The compiler compiles the body of the lambda such that every occurance of `x` is replaced
|
||||
with code to get free variable #0 from the current closure. (`y` is, naturally, parameter #0).
|
||||
Otherwise, no special handling is necessary.
|
||||
|
||||
The inner lambda has no other expressions creating further scopes, so the compiler
|
||||
knows it has hit the deepest scope in the expression, and starts scanning outwards once again.
|
||||
|
||||
Scanning outwards, the compiler sees that there is a defined symbol x, and in the scope
|
||||
of this definition, a lambda expression that uses a free symbol named x is used. The
|
||||
compiler matches these, and compiles the lambda expression (as in, the value that the lambda
|
||||
expression will evaluate to) such that it creates a closure object: a pair of code pointer
|
||||
pointing to the already compiled body, and an array of length 1 containing the current
|
||||
value of x.
|
||||
|
||||
This newly created value represents the closure. As you might notice, the current value
|
||||
of x has been copied into the closure object. The closure is now returned, and the
|
||||
scope of `adder` is destroyed. The closure object survives.
|
||||
|
||||
Note: in actuality, the outer `adder` function itself is also a closure. The inner
|
||||
lambda actually has *two* free variables: `+` is also a symbol, and its value is not
|
||||
defined in the body of the lambda. Since `adder` also doesn't define it, the free symbol
|
||||
is propagated outwards, and adder also accesses it as a free variable. The compiler
|
||||
(when propagating free symbols) eventually reaches the global environment, and
|
||||
resolves these free symbols to their global definitions.
|
||||
|
||||
All global symbols are late-bound. Once the free symbol is propagated outwards to the global
|
||||
definition, the compiler must notice this and insert an instruction to get the
|
||||
value of a global symbol.
|
||||
|
||||
Thus, the following will raise an error at runtime:
|
||||
|
||||
```
|
||||
(define (adder x)
|
||||
(lambda (y) (+ x y)))
|
||||
(set! '+ 5)
|
||||
; + now equals 5.
|
||||
(adder 5 5)
|
||||
```
|
||||
|
||||
Since `5` is not a function, it cannot be called, and this will raise an error.
|
||||
|
||||
## Note on boxing
|
||||
|
||||
Closure conversion makes some situations a bit tricky.
|
||||
|
||||
```
|
||||
(let ((x 10))
|
||||
(let ((f (lambda () x))) ;; f captures x
|
||||
(set! x 20) ;; we change local x
|
||||
(f))) ;; does this return 10 or 20?
|
||||
```
|
||||
|
||||
In this case, instead of x being copied directly into the closure, a
|
||||
reference to its value is copied into the closure. This is usual in
|
||||
most schemes and lisps.
|
||||
|
||||
In fact, you can even treat these as mutable state:
|
||||
|
||||
```
|
||||
(define (make-counter)
|
||||
(let ((count 0))
|
||||
(lambda ()
|
||||
(set! count (+ count 1))
|
||||
count)))
|
||||
```
|
||||
|
||||
So a closure can capture not just the value of a symbol, but also a
|
||||
reference to it. This reference survives the end of the `make-counter`
|
||||
function.
|
||||
|
||||
## Note on currying
|
||||
|
||||
Because this language is actually a curried variant of lisp/scheme, the
|
||||
above function could also be written like this:
|
||||
|
||||
```scheme
|
||||
(define (adder x y) (+ x y))
|
||||
```
|
||||
|
||||
or, even like this:
|
||||
|
||||
```scheme
|
||||
(define adder +)
|
||||
```
|
||||
|
||||
... since the built-in `+` function is also already curried. In fact, the entire
|
||||
language is curried. All function calls are (or behave as if they were) unary.
|
||||
The function call syntax `(f x y)` is actually treated as `((f x) y)` by the
|
||||
compiler.
|
||||
|
||||
## Note on syntax
|
||||
|
||||
I am using more or less regular Scheme syntax in this document. However, this is
|
||||
potentially subject to change. I have not decided on what the official syntax
|
||||
should be like. I am using Scheme syntax simply because I think it is fairly clean,
|
||||
but some changes might make sense in the future as the semantics of this language
|
||||
deviate greatly from Scheme's.
|
||||
|
||||
## Note on performance
|
||||
|
||||
This design document may raise concerns of performance. If everything above is
|
||||
truly set in stone, then it seems obvious that there should be a performance
|
||||
penalty.
|
||||
|
||||
As written, this design requires a basic addition like `(+ 1 2)` to allocate a
|
||||
closure object after all. No matter how fast OCaml's minor heap may be
|
||||
(and it is plenty fast, to be fair), that is not going to go well in a tight loop.
|
||||
|
||||
These are valid concerns, and I am currently leaving these problems to my future
|
||||
self.
|
||||
|
||||
Optimizing multiple-argument functions is actually fairly straightforward (or
|
||||
it looks easy, at least), however I want to first make sure the language
|
||||
has consistent semantics. A slow language is better than no language, after all.
|
||||
So I intend to add the facilities necessary for these optimizations into the
|
||||
compiler at a later point.
|
||||
|
||||
## Global Definitions
|
||||
|
||||
Global definitions get a separate section because they're mostly straightforward.
|
||||
|
||||
Any symbol defined through a top-level `define` form is made globally available
|
||||
after the definition form. More accurately, the symbol is present in the program
|
||||
before the define is reached, however it will be bound to a dummy value until
|
||||
it is accessed.
|
||||
|
||||
This behaviour is proposed for the purpose of allowing mutually
|
||||
recursive definitions without issue, however please note that this is not yet certain,
|
||||
because this design comes with the tradeoff that errors involving symbols accessed
|
||||
before the point they are supposed to be defined can only be detected at runtime.
|
||||
|
||||
To illustrate the problems this could cause:
|
||||
|
||||
```
|
||||
(define b (+ a 10))
|
||||
(define a 5)
|
||||
```
|
||||
|
||||
This is pretty clearly an error - yet the compiler cannot, as proposed, determine
|
||||
this. In the future, further passes over the source code could be added to scan
|
||||
for such issues, or a differentiator between top-level function and variable
|
||||
definitions to prevent this.
|
||||
|
||||
Notably, this problem does not occur for function definitions. In fact, the following
|
||||
is perfectly fine despite looking a bit similar:
|
||||
|
||||
```
|
||||
(define (b) (+ a 10))
|
||||
(define a 5)
|
||||
```
|
||||
|
||||
Generally any symbol appearing in the body of a function, will only be compiled
|
||||
to access that symbol. The symbol is only accessed once the function is called.
|
||||
Thus, you can create mutually recursive functions at the top level with no issue.
|
||||
|
||||
The body of the definition is only executed once the `define` form is reached.
|
||||
Thus, definitions with side effects will execute exactly in the order they
|
||||
appear in the source.
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
(lang dune 3.7)
|
||||
(using menhir 2.1)
|
||||
(generate_opam_files true)
|
||||
|
||||
(package
|
||||
(name ollisp))
|
||||
(name ollisp)
|
||||
(depends menhir))
|
||||
|
||||
@@ -16,7 +16,6 @@ type expression =
|
||||
| Var of string
|
||||
| Apply of expression * expression
|
||||
| Lambda of string * expression
|
||||
(*| LetRec of (string * expression) list * expression *)
|
||||
| If of expression * expression * expression
|
||||
| Set of string * expression
|
||||
| Begin of expression list
|
||||
|
||||
104
lib/compiler/scope_analysis.ml
Normal file
104
lib/compiler/scope_analysis.ml
Normal file
@@ -0,0 +1,104 @@
|
||||
|
||||
|
||||
module SymbolTable = Map.Make(String);;
|
||||
|
||||
let ( let* ) = Result.bind
|
||||
let traverse = Util.traverse
|
||||
|
||||
(* literals are not modified. *)
|
||||
type literal = Core_ast.literal
|
||||
|
||||
(* Note:
|
||||
all symbol accesses are replaced with either a local or global access.
|
||||
Local accesses a symbol in the local scope.
|
||||
Global accesses a symbol in the global scope.
|
||||
|
||||
Lambda expressions are stripped of the symbol name of their single parameter.
|
||||
This name is not needed at runtime, as all symbol accesses will be resolved
|
||||
into an index into either the local scope linked list or the global symbol table.
|
||||
|
||||
Set is also split into its global and local versions, just like Var.
|
||||
|
||||
The rest aren't modified at all.
|
||||
*)
|
||||
type expression =
|
||||
| Literal of literal
|
||||
| Local of int
|
||||
| Global of int
|
||||
| Apply of expression * expression
|
||||
| Lambda of expression
|
||||
| If of expression * expression * expression
|
||||
| SetLocal of int * expression
|
||||
| SetGlobal of int * expression
|
||||
| Begin of expression list
|
||||
|
||||
|
||||
(* extract all defined global symbols, given the top-level expressions
|
||||
and definitions of a program
|
||||
|
||||
The returned table maps symbol names to unique integers, representing
|
||||
an index into a global array where the values of all global symbols will
|
||||
be kept at runtime.
|
||||
*)
|
||||
let extract_globals (top : Core_ast.top_level list) =
|
||||
let id_counter = (ref (-1)) in
|
||||
let id () =
|
||||
id_counter := !id_counter + 1; !id_counter in
|
||||
let rec aux tbl = function
|
||||
| [] -> tbl
|
||||
| Core_ast.Define (sym, _) :: rest ->
|
||||
aux (SymbolTable.add sym (id ()) tbl) rest
|
||||
| Expr _ :: rest ->
|
||||
aux tbl rest
|
||||
in aux SymbolTable.empty top
|
||||
|
||||
(* The current lexical scope is simply a linked list of entries,
|
||||
and each symbol access will be resolved as an access to an index
|
||||
in this linked list. The symbol names are erased before runtime.
|
||||
During this analysis we keep the lexical scope as a linked list of
|
||||
symbols, and we find the index by traversing this linked list.
|
||||
*)
|
||||
|
||||
let resolve_global tbl sym =
|
||||
match SymbolTable.find_opt sym tbl with
|
||||
| Some x -> Ok (Global x)
|
||||
| None -> Error ("symbol " ^ sym ^ " is not defined!")
|
||||
|
||||
let resolve_lexical tbl env sym =
|
||||
let rec aux counter = function
|
||||
| [] -> resolve_global tbl sym
|
||||
| x :: _ when String.equal x sym -> Ok (Local counter)
|
||||
| _ :: rest -> aux (counter + 1) rest
|
||||
in aux 0 env
|
||||
|
||||
let resolve_symbol tbl env sym =
|
||||
resolve_lexical tbl env sym
|
||||
|
||||
let resolve_set tbl env sym expr =
|
||||
let* sym = resolve_symbol tbl env sym in
|
||||
match sym with
|
||||
| Local i -> Ok (SetLocal (i, expr))
|
||||
| Global i -> Ok (SetGlobal (i, expr))
|
||||
| _ -> Error "resolve_set: symbol resolution returned something invalid."
|
||||
|
||||
let rec analyze tbl current = function
|
||||
| Core_ast.Literal s -> Ok (Literal s)
|
||||
| Var sym -> resolve_symbol tbl current sym
|
||||
| Set (sym, expr) ->
|
||||
let* inner = analyze tbl current expr in
|
||||
resolve_set tbl current sym inner
|
||||
| Lambda (s, body) ->
|
||||
let* body = (analyze tbl (s :: current) body) in
|
||||
Ok (Lambda body)
|
||||
| Apply (f, e) ->
|
||||
let* f = analyze tbl current f in
|
||||
let* e = analyze tbl current e in
|
||||
Ok (Apply (f, e))
|
||||
| If (test, pos, neg) ->
|
||||
let* test = analyze tbl current test in
|
||||
let* pos = analyze tbl current pos in
|
||||
let* neg = analyze tbl current neg in
|
||||
Ok (If (test, pos, neg))
|
||||
| Begin el ->
|
||||
let* body = traverse (analyze tbl current) el in
|
||||
Ok (Begin body)
|
||||
@@ -34,13 +34,7 @@ type top_level =
|
||||
|
||||
(* we use result here to make things nicer *)
|
||||
let ( let* ) = Result.bind
|
||||
let traverse f l =
|
||||
let rec aux acc = function
|
||||
| x :: xs ->
|
||||
let* result = f x in
|
||||
aux (result :: acc) xs
|
||||
| [] -> Ok (List.rev acc) in
|
||||
aux [] l
|
||||
let traverse = Util.traverse
|
||||
let map = List.map
|
||||
|
||||
|
||||
|
||||
9
lib/compiler/util.ml
Normal file
9
lib/compiler/util.ml
Normal file
@@ -0,0 +1,9 @@
|
||||
let ( let* ) = Result.bind
|
||||
|
||||
let traverse f l =
|
||||
let rec aux acc = function
|
||||
| x :: xs ->
|
||||
let* result = f x in
|
||||
aux (result :: acc) xs
|
||||
| [] -> Ok (List.rev acc) in
|
||||
aux [] l
|
||||
21
ollisp.opam
Normal file
21
ollisp.opam
Normal file
@@ -0,0 +1,21 @@
|
||||
# This file is generated by dune, edit dune-project instead
|
||||
opam-version: "2.0"
|
||||
depends: [
|
||||
"dune" {>= "3.7"}
|
||||
"menhir"
|
||||
"odoc" {with-doc}
|
||||
]
|
||||
build: [
|
||||
["dune" "subst"] {dev}
|
||||
[
|
||||
"dune"
|
||||
"build"
|
||||
"-p"
|
||||
name
|
||||
"-j"
|
||||
jobs
|
||||
"@install"
|
||||
"@runtest" {with-test}
|
||||
"@doc" {with-doc}
|
||||
]
|
||||
]
|
||||
Reference in New Issue
Block a user