Compare commits
12 Commits
3a3bf2c674
...
compiler
| Author | SHA1 | Date | |
|---|---|---|---|
|
8d301a6fc2
|
|||
|
81dfc07867
|
|||
|
bd6acf89e0
|
|||
|
36ef8f2a22
|
|||
|
3a7f3971ba
|
|||
|
b5b0a44400
|
|||
|
b2e3f5703b
|
|||
|
2d038279f2
|
|||
|
fae7bd8077
|
|||
|
5e91f6e8fa
|
|||
|
7cdd4ee759
|
|||
|
a250d96c63
|
21
.woodpecker/publish.yaml
Normal file
21
.woodpecker/publish.yaml
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
when:
|
||||||
|
event: [push, cron, pull_request, manual]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Build Nightly Artifact
|
||||||
|
image: ocaml/opam:debian-11-ocaml-5.4
|
||||||
|
commands:
|
||||||
|
- opam install . --deps-only
|
||||||
|
- opam exec -- dune build
|
||||||
|
- mkdir -p dist
|
||||||
|
- opam exec -- dune install --prefix=$(pwd)/dist
|
||||||
|
|
||||||
|
- tar czvf ollisp-nightly-amd64.tar.gz -C dist .
|
||||||
|
- name: Publish to Gitea
|
||||||
|
image: curlimages/curl
|
||||||
|
environment:
|
||||||
|
GITEA_TOKEN:
|
||||||
|
from_secret: package_token
|
||||||
|
commands:
|
||||||
|
- curl -v --user "$CI_REPO_OWNER:$GITEA_TOKEN" --upload-file ollisp-nightly-amd64.tar.gz $CI_FORGE_URL/api/packages/$CI_REPO_OWNER/generic/olisp/nightly/ollisp-nightly-amd64.tar.gz?duplicate_upgrade=true
|
||||||
|
|
||||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2026 Emin Arslan
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
210
doc/env.md
Normal file
210
doc/env.md
Normal file
@@ -0,0 +1,210 @@
|
|||||||
|
This document holds my design notes for lexical and global environments
|
||||||
|
for this compiler. I have not yet named the language.
|
||||||
|
|
||||||
|
# Closures
|
||||||
|
|
||||||
|
The environment system implements flat closures.
|
||||||
|
When a closure is created at runtime, all free variables
|
||||||
|
it uses are packaged as part of the function object, then the function
|
||||||
|
body uses a GetFree instruction to get those free variables by an index.
|
||||||
|
|
||||||
|
(Free variables are propagated from inner closures outwards. This is necessary,
|
||||||
|
as this also handles multiple-argument functions gracefully.)
|
||||||
|
|
||||||
|
```scheme
|
||||||
|
(let ((a 10))
|
||||||
|
(print (+ a 5)))
|
||||||
|
```
|
||||||
|
|
||||||
|
This code will be compiled as a lambda that takes a single parameter and executes
|
||||||
|
the body `(print (+ a 5))`, which is called immediately with the value 10.
|
||||||
|
|
||||||
|
The compiler tries to perform symbol resolution on expressions in the body of the
|
||||||
|
let as well, however it sees no other expressions creating further scopes.
|
||||||
|
|
||||||
|
Since there are two free symbols in this code (`+` and `print`), and the surrounding
|
||||||
|
environment does not have these two symbols defined locally, both of these symbols
|
||||||
|
will be resolved to their global definitions directly.
|
||||||
|
|
||||||
|
Now let's examine a classic example of closures:
|
||||||
|
|
||||||
|
```scheme
|
||||||
|
(define (adder x)
|
||||||
|
(lambda (y) (+ x y)))
|
||||||
|
```
|
||||||
|
|
||||||
|
The adder function takes an argument x, and creates returns a function that adds x
|
||||||
|
to its argument.
|
||||||
|
|
||||||
|
This is implemented by a compiler pass that resolves symbols. Starting from top-level
|
||||||
|
expressions, it scans downwards, noting every free symbol. A free symbol is one
|
||||||
|
that is used in an expression, yet has no value defined locally in that expression.
|
||||||
|
In other words, its value must come from the surrounding scope.
|
||||||
|
|
||||||
|
In this example, the adder function has a symbol x that is a part of its function definition.
|
||||||
|
This is clearly not a free variable. However, examining the inner lambda expression,
|
||||||
|
we can see that it uses y (which is not free) and x. The value of x is not defined
|
||||||
|
as part of the lambda expression, so it must be free.
|
||||||
|
|
||||||
|
The compiler, seeing this, notes that the inner lambda has a free variable `x`, and a parameter
|
||||||
|
`y`. Thus, the lambda has 1 free variable and 1 parameter. This means the closure object will have
|
||||||
|
a code pointer along with an array of length 1 forming the storage for the free variable(s).
|
||||||
|
The compiler compiles the body of the lambda such that every occurance of `x` is replaced
|
||||||
|
with code to get free variable #0 from the current closure. (`y` is, naturally, parameter #0).
|
||||||
|
Otherwise, no special handling is necessary.
|
||||||
|
|
||||||
|
The inner lambda has no other expressions creating further scopes, so the compiler
|
||||||
|
knows it has hit the deepest scope in the expression, and starts scanning outwards once again.
|
||||||
|
|
||||||
|
Scanning outwards, the compiler sees that there is a defined symbol x, and in the scope
|
||||||
|
of this definition, a lambda expression that uses a free symbol named x is used. The
|
||||||
|
compiler matches these, and compiles the lambda expression (as in, the value that the lambda
|
||||||
|
expression will evaluate to) such that it creates a closure object: a pair of code pointer
|
||||||
|
pointing to the already compiled body, and an array of length 1 containing the current
|
||||||
|
value of x.
|
||||||
|
|
||||||
|
This newly created value represents the closure. As you might notice, the current value
|
||||||
|
of x has been copied into the closure object. The closure is now returned, and the
|
||||||
|
scope of `adder` is destroyed. The closure object survives.
|
||||||
|
|
||||||
|
Note: in actuality, the outer `adder` function itself is also a closure. The inner
|
||||||
|
lambda actually has *two* free variables: `+` is also a symbol, and its value is not
|
||||||
|
defined in the body of the lambda. Since `adder` also doesn't define it, the free symbol
|
||||||
|
is propagated outwards, and adder also accesses it as a free variable. The compiler
|
||||||
|
(when propagating free symbols) eventually reaches the global environment, and
|
||||||
|
resolves these free symbols to their global definitions.
|
||||||
|
|
||||||
|
All global symbols are late-bound. Once the free symbol is propagated outwards to the global
|
||||||
|
definition, the compiler must notice this and insert an instruction to get the
|
||||||
|
value of a global symbol.
|
||||||
|
|
||||||
|
Thus, the following will raise an error at runtime:
|
||||||
|
|
||||||
|
```
|
||||||
|
(define (adder x)
|
||||||
|
(lambda (y) (+ x y)))
|
||||||
|
(set! '+ 5)
|
||||||
|
; + now equals 5.
|
||||||
|
(adder 5 5)
|
||||||
|
```
|
||||||
|
|
||||||
|
Since `5` is not a function, it cannot be called, and this will raise an error.
|
||||||
|
|
||||||
|
## Note on boxing
|
||||||
|
|
||||||
|
Closure conversion makes some situations a bit tricky.
|
||||||
|
|
||||||
|
```
|
||||||
|
(let ((x 10))
|
||||||
|
(let ((f (lambda () x))) ;; f captures x
|
||||||
|
(set! x 20) ;; we change local x
|
||||||
|
(f))) ;; does this return 10 or 20?
|
||||||
|
```
|
||||||
|
|
||||||
|
In this case, instead of x being copied directly into the closure, a
|
||||||
|
reference to its value is copied into the closure. This is usual in
|
||||||
|
most schemes and lisps.
|
||||||
|
|
||||||
|
In fact, you can even treat these as mutable state:
|
||||||
|
|
||||||
|
```
|
||||||
|
(define (make-counter)
|
||||||
|
(let ((count 0))
|
||||||
|
(lambda ()
|
||||||
|
(set! count (+ count 1))
|
||||||
|
count)))
|
||||||
|
```
|
||||||
|
|
||||||
|
So a closure can capture not just the value of a symbol, but also a
|
||||||
|
reference to it. This reference survives the end of the `make-counter`
|
||||||
|
function.
|
||||||
|
|
||||||
|
## Note on currying
|
||||||
|
|
||||||
|
Because this language is actually a curried variant of lisp/scheme, the
|
||||||
|
above function could also be written like this:
|
||||||
|
|
||||||
|
```scheme
|
||||||
|
(define (adder x y) (+ x y))
|
||||||
|
```
|
||||||
|
|
||||||
|
or, even like this:
|
||||||
|
|
||||||
|
```scheme
|
||||||
|
(define adder +)
|
||||||
|
```
|
||||||
|
|
||||||
|
... since the built-in `+` function is also already curried. In fact, the entire
|
||||||
|
language is curried. All function calls are (or behave as if they were) unary.
|
||||||
|
The function call syntax `(f x y)` is actually treated as `((f x) y)` by the
|
||||||
|
compiler.
|
||||||
|
|
||||||
|
## Note on syntax
|
||||||
|
|
||||||
|
I am using more or less regular Scheme syntax in this document. However, this is
|
||||||
|
potentially subject to change. I have not decided on what the official syntax
|
||||||
|
should be like. I am using Scheme syntax simply because I think it is fairly clean,
|
||||||
|
but some changes might make sense in the future as the semantics of this language
|
||||||
|
deviate greatly from Scheme's.
|
||||||
|
|
||||||
|
## Note on performance
|
||||||
|
|
||||||
|
This design document may raise concerns of performance. If everything above is
|
||||||
|
truly set in stone, then it seems obvious that there should be a performance
|
||||||
|
penalty.
|
||||||
|
|
||||||
|
As written, this design requires a basic addition like `(+ 1 2)` to allocate a
|
||||||
|
closure object after all. No matter how fast OCaml's minor heap may be
|
||||||
|
(and it is plenty fast, to be fair), that is not going to go well in a tight loop.
|
||||||
|
|
||||||
|
These are valid concerns, and I am currently leaving these problems to my future
|
||||||
|
self.
|
||||||
|
|
||||||
|
Optimizing multiple-argument functions is actually fairly straightforward (or
|
||||||
|
it looks easy, at least), however I want to first make sure the language
|
||||||
|
has consistent semantics. A slow language is better than no language, after all.
|
||||||
|
So I intend to add the facilities necessary for these optimizations into the
|
||||||
|
compiler at a later point.
|
||||||
|
|
||||||
|
## Global Definitions
|
||||||
|
|
||||||
|
Global definitions get a separate section because they're mostly straightforward.
|
||||||
|
|
||||||
|
Any symbol defined through a top-level `define` form is made globally available
|
||||||
|
after the definition form. More accurately, the symbol is present in the program
|
||||||
|
before the define is reached, however it will be bound to a dummy value until
|
||||||
|
it is accessed.
|
||||||
|
|
||||||
|
This behaviour is proposed for the purpose of allowing mutually
|
||||||
|
recursive definitions without issue, however please note that this is not yet certain,
|
||||||
|
because this design comes with the tradeoff that errors involving symbols accessed
|
||||||
|
before the point they are supposed to be defined can only be detected at runtime.
|
||||||
|
|
||||||
|
To illustrate the problems this could cause:
|
||||||
|
|
||||||
|
```
|
||||||
|
(define b (+ a 10))
|
||||||
|
(define a 5)
|
||||||
|
```
|
||||||
|
|
||||||
|
This is pretty clearly an error - yet the compiler cannot, as proposed, determine
|
||||||
|
this. In the future, further passes over the source code could be added to scan
|
||||||
|
for such issues, or a differentiator between top-level function and variable
|
||||||
|
definitions to prevent this.
|
||||||
|
|
||||||
|
Notably, this problem does not occur for function definitions. In fact, the following
|
||||||
|
is perfectly fine despite looking a bit similar:
|
||||||
|
|
||||||
|
```
|
||||||
|
(define (b) (+ a 10))
|
||||||
|
(define a 5)
|
||||||
|
```
|
||||||
|
|
||||||
|
Generally any symbol appearing in the body of a function, will only be compiled
|
||||||
|
to access that symbol. The symbol is only accessed once the function is called.
|
||||||
|
Thus, you can create mutually recursive functions at the top level with no issue.
|
||||||
|
|
||||||
|
The body of the definition is only executed once the `define` form is reached.
|
||||||
|
Thus, definitions with side effects will execute exactly in the order they
|
||||||
|
appear in the source.
|
||||||
|
|
||||||
@@ -1,5 +1,7 @@
|
|||||||
(lang dune 3.7)
|
(lang dune 3.7)
|
||||||
(using menhir 2.1)
|
(using menhir 2.1)
|
||||||
|
(generate_opam_files true)
|
||||||
|
|
||||||
(package
|
(package
|
||||||
(name ollisp))
|
(name ollisp)
|
||||||
|
(depends menhir))
|
||||||
|
|||||||
@@ -16,7 +16,6 @@ type expression =
|
|||||||
| Var of string
|
| Var of string
|
||||||
| Apply of expression * expression
|
| Apply of expression * expression
|
||||||
| Lambda of string * expression
|
| Lambda of string * expression
|
||||||
(*| LetRec of (string * expression) list * expression *)
|
|
||||||
| If of expression * expression * expression
|
| If of expression * expression * expression
|
||||||
| Set of string * expression
|
| Set of string * expression
|
||||||
| Begin of expression list
|
| Begin of expression list
|
||||||
|
|||||||
104
lib/compiler/scope_analysis.ml
Normal file
104
lib/compiler/scope_analysis.ml
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
|
||||||
|
|
||||||
|
module SymbolTable = Map.Make(String);;
|
||||||
|
|
||||||
|
let ( let* ) = Result.bind
|
||||||
|
let traverse = Util.traverse
|
||||||
|
|
||||||
|
(* literals are not modified. *)
|
||||||
|
type literal = Core_ast.literal
|
||||||
|
|
||||||
|
(* Note:
|
||||||
|
all symbol accesses are replaced with either a local or global access.
|
||||||
|
Local accesses a symbol in the local scope.
|
||||||
|
Global accesses a symbol in the global scope.
|
||||||
|
|
||||||
|
Lambda expressions are stripped of the symbol name of their single parameter.
|
||||||
|
This name is not needed at runtime, as all symbol accesses will be resolved
|
||||||
|
into an index into either the local scope linked list or the global symbol table.
|
||||||
|
|
||||||
|
Set is also split into its global and local versions, just like Var.
|
||||||
|
|
||||||
|
The rest aren't modified at all.
|
||||||
|
*)
|
||||||
|
type expression =
|
||||||
|
| Literal of literal
|
||||||
|
| Local of int
|
||||||
|
| Global of int
|
||||||
|
| Apply of expression * expression
|
||||||
|
| Lambda of expression
|
||||||
|
| If of expression * expression * expression
|
||||||
|
| SetLocal of int * expression
|
||||||
|
| SetGlobal of int * expression
|
||||||
|
| Begin of expression list
|
||||||
|
|
||||||
|
|
||||||
|
(* extract all defined global symbols, given the top-level expressions
|
||||||
|
and definitions of a program
|
||||||
|
|
||||||
|
The returned table maps symbol names to unique integers, representing
|
||||||
|
an index into a global array where the values of all global symbols will
|
||||||
|
be kept at runtime.
|
||||||
|
*)
|
||||||
|
let extract_globals (top : Core_ast.top_level list) =
|
||||||
|
let id_counter = (ref (-1)) in
|
||||||
|
let id () =
|
||||||
|
id_counter := !id_counter + 1; !id_counter in
|
||||||
|
let rec aux tbl = function
|
||||||
|
| [] -> tbl
|
||||||
|
| Core_ast.Define (sym, _) :: rest ->
|
||||||
|
aux (SymbolTable.add sym (id ()) tbl) rest
|
||||||
|
| Expr _ :: rest ->
|
||||||
|
aux tbl rest
|
||||||
|
in aux SymbolTable.empty top
|
||||||
|
|
||||||
|
(* The current lexical scope is simply a linked list of entries,
|
||||||
|
and each symbol access will be resolved as an access to an index
|
||||||
|
in this linked list. The symbol names are erased before runtime.
|
||||||
|
During this analysis we keep the lexical scope as a linked list of
|
||||||
|
symbols, and we find the index by traversing this linked list.
|
||||||
|
*)
|
||||||
|
|
||||||
|
let resolve_global tbl sym =
|
||||||
|
match SymbolTable.find_opt sym tbl with
|
||||||
|
| Some x -> Ok (Global x)
|
||||||
|
| None -> Error ("symbol " ^ sym ^ " is not defined!")
|
||||||
|
|
||||||
|
let resolve_lexical tbl env sym =
|
||||||
|
let rec aux counter = function
|
||||||
|
| [] -> resolve_global tbl sym
|
||||||
|
| x :: _ when String.equal x sym -> Ok (Local counter)
|
||||||
|
| _ :: rest -> aux (counter + 1) rest
|
||||||
|
in aux 0 env
|
||||||
|
|
||||||
|
let resolve_symbol tbl env sym =
|
||||||
|
resolve_lexical tbl env sym
|
||||||
|
|
||||||
|
let resolve_set tbl env sym expr =
|
||||||
|
let* sym = resolve_symbol tbl env sym in
|
||||||
|
match sym with
|
||||||
|
| Local i -> Ok (SetLocal (i, expr))
|
||||||
|
| Global i -> Ok (SetGlobal (i, expr))
|
||||||
|
| _ -> Error "resolve_set: symbol resolution returned something invalid."
|
||||||
|
|
||||||
|
let rec analyze tbl current = function
|
||||||
|
| Core_ast.Literal s -> Ok (Literal s)
|
||||||
|
| Var sym -> resolve_symbol tbl current sym
|
||||||
|
| Set (sym, expr) ->
|
||||||
|
let* inner = analyze tbl current expr in
|
||||||
|
resolve_set tbl current sym inner
|
||||||
|
| Lambda (s, body) ->
|
||||||
|
let* body = (analyze tbl (s :: current) body) in
|
||||||
|
Ok (Lambda body)
|
||||||
|
| Apply (f, e) ->
|
||||||
|
let* f = analyze tbl current f in
|
||||||
|
let* e = analyze tbl current e in
|
||||||
|
Ok (Apply (f, e))
|
||||||
|
| If (test, pos, neg) ->
|
||||||
|
let* test = analyze tbl current test in
|
||||||
|
let* pos = analyze tbl current pos in
|
||||||
|
let* neg = analyze tbl current neg in
|
||||||
|
Ok (If (test, pos, neg))
|
||||||
|
| Begin el ->
|
||||||
|
let* body = traverse (analyze tbl current) el in
|
||||||
|
Ok (Begin body)
|
||||||
@@ -34,13 +34,7 @@ type top_level =
|
|||||||
|
|
||||||
(* we use result here to make things nicer *)
|
(* we use result here to make things nicer *)
|
||||||
let ( let* ) = Result.bind
|
let ( let* ) = Result.bind
|
||||||
let traverse f l =
|
let traverse = Util.traverse
|
||||||
let rec aux acc = function
|
|
||||||
| x :: xs ->
|
|
||||||
let* result = f x in
|
|
||||||
aux (result :: acc) xs
|
|
||||||
| [] -> Ok (List.rev acc) in
|
|
||||||
aux [] l
|
|
||||||
let map = List.map
|
let map = List.map
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
9
lib/compiler/util.ml
Normal file
9
lib/compiler/util.ml
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
let ( let* ) = Result.bind
|
||||||
|
|
||||||
|
let traverse f l =
|
||||||
|
let rec aux acc = function
|
||||||
|
| x :: xs ->
|
||||||
|
let* result = f x in
|
||||||
|
aux (result :: acc) xs
|
||||||
|
| [] -> Ok (List.rev acc) in
|
||||||
|
aux [] l
|
||||||
21
ollisp.opam
Normal file
21
ollisp.opam
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
# This file is generated by dune, edit dune-project instead
|
||||||
|
opam-version: "2.0"
|
||||||
|
depends: [
|
||||||
|
"dune" {>= "3.7"}
|
||||||
|
"menhir"
|
||||||
|
"odoc" {with-doc}
|
||||||
|
]
|
||||||
|
build: [
|
||||||
|
["dune" "subst"] {dev}
|
||||||
|
[
|
||||||
|
"dune"
|
||||||
|
"build"
|
||||||
|
"-p"
|
||||||
|
name
|
||||||
|
"-j"
|
||||||
|
jobs
|
||||||
|
"@install"
|
||||||
|
"@runtest" {with-test}
|
||||||
|
"@doc" {with-doc}
|
||||||
|
]
|
||||||
|
]
|
||||||
Reference in New Issue
Block a user