Compare commits

..

13 Commits

Author SHA1 Message Date
haxala1r 37c8d2a62a Added some standard functions 2025-10-30 23:42:10 +03:00
haxala1r 45828a8dd4 reorganized a bit, separated bind-symbol into two operators that have different uses, def and set 2025-10-30 21:01:40 +03:00
Emin Arslan ccf1aec5da Removed irrelevant comment 2025-10-16 22:31:12 +03:00
Emin Arslan 6c3efde5e9 Changed the addition and subtraction functions to be clearer 2025-10-16 22:28:11 +03:00
Emin Arslan 8273baecf1 Added def, changed naming, and added if expressions 2025-10-16 17:15:04 +03:00
Emin Arslan fb52fb03b6 Evaluation is now performed properly, mimicking Common Lisp, and basic defun and defmacro definitions are provided (automatically executed on startup) 2025-10-14 22:26:00 +03:00
Emin Arslan 7105b2dd39 Added dot syntax for lists, and proper quote syntax. LQuoted is now unused 2025-10-14 22:24:57 +03:00
Emin Arslan be6e1cd684 Improved the repl to return to evaluation upon error. Also added an exit command 2025-10-14 21:06:11 +03:00
Emin Arslan b0ded579af Added builtin special forms, lambda forms and bind-symbol. got rid of bind-function, as it is now unnecessary. it is now possible to create functions! 2025-10-14 21:05:10 +03:00
Emin Arslan 22e7c3dbb3 Re-organized a lot of code, changed functions so that functions capture the surrounding environment and execute in that environment 2025-10-14 20:21:29 +03:00
Emin Arslan 965804c18d General style changes, nothing major 2025-10-14 19:01:29 +03:00
Emin Arslan a905ab2b42 Added bind-function primitive that allows us to define functions, also changed evaluation to allow for a persistent environment 2025-10-12 21:58:54 +03:00
Emin Arslan aa066f87d0 Initial state - basic lexer + parser + interpreter 2025-10-12 21:33:57 +03:00
39 changed files with 368 additions and 1630 deletions
-1
View File
@@ -1 +0,0 @@
use flake
-3
View File
@@ -1,4 +1 @@
_build _build
*~
.direnv
result
-12
View File
@@ -1,12 +0,0 @@
when:
event: [push, cron, pull_request, manual]
steps:
- name: Build on Debian
image: debian:13
pull: true
commands:
- apt update && apt full-upgrade -y
- apt install -y ocaml libfindlib-ocaml-dev ocaml-dune menhir
- dune build
- dune exec ollisp
-12
View File
@@ -1,12 +0,0 @@
when:
event: [push, cron, pull_request, manual]
steps:
- name: Build on Fedora
image: fedora:43
pull: true
commands:
- dnf update -y
- dnf install -y ocaml ocaml-findlib menhir dune
- dune build
- dune exec ollisp
-10
View File
@@ -1,10 +0,0 @@
when:
event: [push, cron, pull_request, manual]
steps:
- name: Build on NixOS
image: nixos/nix:latest
pull: true
commands:
- nix --extra-experimental-features nix-command --extra-experimental-features flakes build
- ./result/bin/ollisp
-21
View File
@@ -1,21 +0,0 @@
when:
event: [push, cron, pull_request, manual]
steps:
- name: Build Nightly Artifact
image: ocaml/opam:debian-11-ocaml-5.4
commands:
- opam install . --deps-only
- opam exec -- dune build
- mkdir -p dist
- opam exec -- dune install --prefix=$(pwd)/dist
- tar czvf ollisp-nightly-amd64.tar.gz -C dist .
- name: Publish to Gitea
image: curlimages/curl
environment:
GITEA_TOKEN:
from_secret: package_token
commands:
- curl -v --user "$CI_REPO_OWNER:$GITEA_TOKEN" --upload-file ollisp-nightly-amd64.tar.gz $CI_FORGE_URL/api/packages/$CI_REPO_OWNER/generic/olisp/nightly/ollisp-nightly-amd64.tar.gz?duplicate_upgrade=true
-21
View File
@@ -1,21 +0,0 @@
MIT License
Copyright (c) 2026 Emin Arslan
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
-10
View File
@@ -1,10 +0,0 @@
[![built with garnix](https://img.shields.io/endpoint.svg?url=https%3A%2F%2Fgarnix.io%2Fapi%2Fbadges%2Fhaxala1r%2Flisp)](https://garnix.io/repo/haxala1r/lisp)
## Lisp compiler written in OCaml
This project is a small lisp compiler written in OCaml.
Currently, a small lisp-like language is compiled into a custom bytecode format.
Documentation for the language, along with the bytecode format will be published
upon first release.
-17
View File
@@ -1,17 +0,0 @@
(* I don't have any built-in functions at all rn, so we just use a dummy function *)
let rec interpret_loop () =
let l = read_line () in
let vm = Compiler.Emit.compile_src l in
match vm with
| Ok vm ->
print_endline "=== PROGRAM DISASSEMBLY";
Vm.Types.print_instrs vm.instrs;
print_endline "=== PROGRAM OUTPUT";
Vm.interpret vm; interpret_loop ()
| Error s -> print_endline s
let _ = interpret_loop ()
+5 -3
View File
@@ -1,4 +1,6 @@
(executable (executable
(name comp) (name main)
(public_name ollisp) (public_name main)
(libraries str unix compiler vm)) (libraries str lisp unix))
(include_subdirs unqualified)
+29
View File
@@ -0,0 +1,29 @@
open Lisp.Ast;;
open Printf;;
open Lisp;;
open Eval;;
open Read;;
let rec repl env c =
let () = printf ">>> "; Out_channel.flush Out_channel.stdout; in
match In_channel.input_line c with
| None -> ()
| Some "exit" -> ()
| Some l ->
try
let vals = (parse_str l) in
(* dbg_print_all vals; *)
dbg_print_all (eval_all env vals);
Out_channel.flush Out_channel.stdout;
repl env c
with
| Invalid_argument s ->
printf "%s\nResuming repl\n" s;
repl env c
| Parser.Error ->
printf "Expression '%s' couldn't be parsed, try again\n" l;
repl env c
;;
let () = repl (make_env ()) (In_channel.stdin)
-9
View File
@@ -1,9 +0,0 @@
{ pkgs ? import <nixpkgs> {}, ...}:
pkgs.ocamlPackages.buildDunePackage {
pname = "ollisp";
version = "0.0.1";
src = pkgs.lib.cleanSource ./.;
nativeBuildInputs = with pkgs.ocamlPackages; [findlib menhir dune_3 ocaml];
buildInputs = with pkgs.ocamlPackages; [];
}
-210
View File
@@ -1,210 +0,0 @@
This document holds my design notes for lexical and global environments
for this compiler. I have not yet named the language.
# Closures
The environment system implements flat closures.
When a closure is created at runtime, all free variables
it uses are packaged as part of the function object, then the function
body uses a GetFree instruction to get those free variables by an index.
(Free variables are propagated from inner closures outwards. This is necessary,
as this also handles multiple-argument functions gracefully.)
```scheme
(let ((a 10))
(print (+ a 5)))
```
This code will be compiled as a lambda that takes a single parameter and executes
the body `(print (+ a 5))`, which is called immediately with the value 10.
The compiler tries to perform symbol resolution on expressions in the body of the
let as well, however it sees no other expressions creating further scopes.
Since there are two free symbols in this code (`+` and `print`), and the surrounding
environment does not have these two symbols defined locally, both of these symbols
will be resolved to their global definitions directly.
Now let's examine a classic example of closures:
```scheme
(define (adder x)
(lambda (y) (+ x y)))
```
The adder function takes an argument x, and creates returns a function that adds x
to its argument.
This is implemented by a compiler pass that resolves symbols. Starting from top-level
expressions, it scans downwards, noting every free symbol. A free symbol is one
that is used in an expression, yet has no value defined locally in that expression.
In other words, its value must come from the surrounding scope.
In this example, the adder function has a symbol x that is a part of its function definition.
This is clearly not a free variable. However, examining the inner lambda expression,
we can see that it uses y (which is not free) and x. The value of x is not defined
as part of the lambda expression, so it must be free.
The compiler, seeing this, notes that the inner lambda has a free variable `x`, and a parameter
`y`. Thus, the lambda has 1 free variable and 1 parameter. This means the closure object will have
a code pointer along with an array of length 1 forming the storage for the free variable(s).
The compiler compiles the body of the lambda such that every occurance of `x` is replaced
with code to get free variable #0 from the current closure. (`y` is, naturally, parameter #0).
Otherwise, no special handling is necessary.
The inner lambda has no other expressions creating further scopes, so the compiler
knows it has hit the deepest scope in the expression, and starts scanning outwards once again.
Scanning outwards, the compiler sees that there is a defined symbol x, and in the scope
of this definition, a lambda expression that uses a free symbol named x is used. The
compiler matches these, and compiles the lambda expression (as in, the value that the lambda
expression will evaluate to) such that it creates a closure object: a pair of code pointer
pointing to the already compiled body, and an array of length 1 containing the current
value of x.
This newly created value represents the closure. As you might notice, the current value
of x has been copied into the closure object. The closure is now returned, and the
scope of `adder` is destroyed. The closure object survives.
Note: in actuality, the outer `adder` function itself is also a closure. The inner
lambda actually has *two* free variables: `+` is also a symbol, and its value is not
defined in the body of the lambda. Since `adder` also doesn't define it, the free symbol
is propagated outwards, and adder also accesses it as a free variable. The compiler
(when propagating free symbols) eventually reaches the global environment, and
resolves these free symbols to their global definitions.
All global symbols are late-bound. Once the free symbol is propagated outwards to the global
definition, the compiler must notice this and insert an instruction to get the
value of a global symbol.
Thus, the following will raise an error at runtime:
```
(define (adder x)
(lambda (y) (+ x y)))
(set! '+ 5)
; + now equals 5.
(adder 5 5)
```
Since `5` is not a function, it cannot be called, and this will raise an error.
## Note on boxing
Closure conversion makes some situations a bit tricky.
```
(let ((x 10))
(let ((f (lambda () x))) ;; f captures x
(set! x 20) ;; we change local x
(f))) ;; does this return 10 or 20?
```
In this case, instead of x being copied directly into the closure, a
reference to its value is copied into the closure. This is usual in
most schemes and lisps.
In fact, you can even treat these as mutable state:
```
(define (make-counter)
(let ((count 0))
(lambda ()
(set! count (+ count 1))
count)))
```
So a closure can capture not just the value of a symbol, but also a
reference to it. This reference survives the end of the `make-counter`
function.
## Note on currying
Because this language is actually a curried variant of lisp/scheme, the
above function could also be written like this:
```scheme
(define (adder x y) (+ x y))
```
or, even like this:
```scheme
(define adder +)
```
... since the built-in `+` function is also already curried. In fact, the entire
language is curried. All function calls are (or behave as if they were) unary.
The function call syntax `(f x y)` is actually treated as `((f x) y)` by the
compiler.
## Note on syntax
I am using more or less regular Scheme syntax in this document. However, this is
potentially subject to change. I have not decided on what the official syntax
should be like. I am using Scheme syntax simply because I think it is fairly clean,
but some changes might make sense in the future as the semantics of this language
deviate greatly from Scheme's.
## Note on performance
This design document may raise concerns of performance. If everything above is
truly set in stone, then it seems obvious that there should be a performance
penalty.
As written, this design requires a basic addition like `(+ 1 2)` to allocate a
closure object after all. No matter how fast OCaml's minor heap may be
(and it is plenty fast, to be fair), that is not going to go well in a tight loop.
These are valid concerns, and I am currently leaving these problems to my future
self.
Optimizing multiple-argument functions is actually fairly straightforward (or
it looks easy, at least), however I want to first make sure the language
has consistent semantics. A slow language is better than no language, after all.
So I intend to add the facilities necessary for these optimizations into the
compiler at a later point.
## Global Definitions
Global definitions get a separate section because they're mostly straightforward.
Any symbol defined through a top-level `define` form is made globally available
after the definition form. More accurately, the symbol is present in the program
before the define is reached, however it will be bound to a dummy value until
it is accessed.
This behaviour is proposed for the purpose of allowing mutually
recursive definitions without issue, however please note that this is not yet certain,
because this design comes with the tradeoff that errors involving symbols accessed
before the point they are supposed to be defined can only be detected at runtime.
To illustrate the problems this could cause:
```
(define b (+ a 10))
(define a 5)
```
This is pretty clearly an error - yet the compiler cannot, as proposed, determine
this. In the future, further passes over the source code could be added to scan
for such issues, or a differentiator between top-level function and variable
definitions to prevent this.
Notably, this problem does not occur for function definitions. In fact, the following
is perfectly fine despite looking a bit similar:
```
(define (b) (+ a 10))
(define a 5)
```
Generally any symbol appearing in the body of a function, will only be compiled
to access that symbol. The symbol is only accessed once the function is called.
Thus, you can create mutually recursive functions at the top level with no issue.
The body of the definition is only executed once the `define` form is reached.
Thus, definitions with side effects will execute exactly in the order they
appear in the source.
-5
View File
@@ -1,7 +1,2 @@
(lang dune 3.7) (lang dune 3.7)
(using menhir 2.1) (using menhir 2.1)
(generate_opam_files true)
(package
(name ollisp)
(depends menhir))
Generated
-61
View File
@@ -1,61 +0,0 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1778869304,
"narHash": "sha256-30sZNZoA1cqF5JNO9fVX+wgiQYjB7HJqqJ4ztCDeBZE=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "d233902339c02a9c334e7e593de68855ad26c4cb",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}
-17
View File
@@ -1,17 +0,0 @@
{
description = "a lisp interpreter/compiler in ocaml";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
flake-utils.url = "github:numtide/flake-utils";
};
outputs = {self, nixpkgs, flake-utils}:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = nixpkgs.legacyPackages.${system};
in
{
packages.default = pkgs.callPackage ./default.nix {};
devShells.default = pkgs.callPackage ./shell.nix {};
});
}
+80
View File
@@ -0,0 +1,80 @@
type lisp_val =
| LInt of int
| LDouble of float
| LCons of lisp_val * lisp_val
| LNil
| LSymbol of string
| LString of string
(* a builtin function is expressed as a name and the ocaml function
that performs the operation. The function should take a list of arguments.
generally, builtin functions should handle their arguments directly,
and eval forms in the environment as necessary. *)
| LBuiltinFunction of string * (environment -> lisp_val -> lisp_val)
| LBuiltinSpecial of string * (environment -> lisp_val -> lisp_val)
(* a function is a name, captured environment, a parameter list, and function body. *)
| LFunction of string * environment * lisp_val * lisp_val
| LLambda of environment * lisp_val * lisp_val
(* a macro is exactly the same as a function, with the distinction
that it receives all of its arguments completely unevaluated
in a compiled lisp this would probably make more of a difference *)
| LMacro of string * environment * lisp_val * lisp_val
| LUnnamedMacro of environment * lisp_val * lisp_val
| LQuoted of lisp_val
and environment = (string, lisp_val) Hashtbl.t list
let env_set_local env s v =
match env with
| [] -> ()
| e1 :: _ -> Hashtbl.replace e1 s v
let rec env_update env s v =
match env with
| [] -> ()
| e1 :: erest ->
match Hashtbl.find_opt e1 s with
| None -> env_update erest s v
| Some _ -> Hashtbl.replace e1 s v
let env_new_lexical env =
let h = Hashtbl.create 16 in
h :: env
let rec env_root (env : environment) =
match env with
| [] -> raise (Invalid_argument "Empty environment passed to env_root!")
| e :: [] -> e
| _ :: t -> env_root t
let env_set_global env s v =
Hashtbl.replace (env_root env) s v
let env_copy env =
List.map Hashtbl.copy env
let rec dbg_print_one v =
let pf = Printf.sprintf in
match v with
| LInt x -> pf "<int: %d>" x
| LSymbol s -> pf "<symbol: '%s'>" s
| LString s -> pf "<string: '%s'>" s
| LNil -> pf "()"
| LCons (a, b) -> pf "(%s . %s)" (dbg_print_one a) (dbg_print_one b)
| LDouble d -> pf "<double: %f>" d
| LBuiltinSpecial (name, _)
| LBuiltinFunction (name, _) -> pf "<builtin: %s>" name
| LLambda (_, args, _) -> pf "<unnamed function, lambda-list: %s>"
(dbg_print_one args)
| LFunction (name, _, args, _) -> pf "<function: '%s' lambda-list: %s>"
name (dbg_print_one args)
| LUnnamedMacro (_, args, _) -> pf "<unnamed macro, lambda-list: %s>"
(dbg_print_one args)
| LMacro (name, _, args, _) -> pf "<macro '%s' lambda-list: %s>"
name (dbg_print_one args)
| LQuoted v -> pf "<quote: %s>" (dbg_print_one v)
(*| _ -> "<Something else>"*)
let dbg_print_all vs =
let pr v = Printf.printf "%s\n" (dbg_print_one v) in
List.iter pr vs
-117
View File
@@ -1,117 +0,0 @@
let traverse = Util.traverse
type literal =
| Int of int
| Double of float
| String of string
| Symbol of string
| Nil
| Cons of literal * literal
(* The Core Abstract Syntax Tree.
This tree does not use a GADT, as every type of expression
will be reduced to its simplest equivalent form before ending
up here. There is no reason to make this tree typed.
*)
type expression =
| Literal of literal
| Var of string
| Apply of expression * expression list
| Lambda of string list * string option * expression
| If of expression * expression * expression
| Set of string * expression
| Begin of expression list
type top_level =
| Define of string * expression
| Expr of expression
let rec pair_of_def : Syntactic_ast.def -> string * expression =
fun (s, e) -> (s, of_expr e)
and pair_of_binding (s, e) = (s, of_expr e)
and pair_of_clause (e1, e2) = (of_expr e1, of_expr e2)
and make_lambda (args, rest) body =
Lambda (args, rest, body)
(* desugars this...
(let ((x 5) (y 4)) (f x y))
... into this...
((lambda (x y) (f x y)) 5 4)
*)
and make_let bs body =
let bs = List.map pair_of_binding bs in
let args = List.map (fun (s, _) -> s) bs in
let es = List.map (fun (_, e) -> e) bs in
Apply (Lambda (args, None, body), es)
(* The Core AST does not feature a letrec node. Instead, we desugar letrecs further
into a let that binds each symbol to nil, then `set!`s them to their real value
before running the body.
*)
and make_letrec bs exprs =
let tmp_bs = List.map (fun (_, _) -> Literal Nil) bs in
let setters = List.fold_right (fun (s, e) acc -> (Set (s, e)) :: acc) bs [] in
let args = List.map (fun (s, _) -> s) bs in
let body = Begin ((List.rev setters) @ exprs) in
Apply (Lambda (args, None, body), tmp_bs)
(* We convert a body into a regular letrec form.
A body is defined as a series of definitions followed by a series
of expressions. The definitions behave exactly as a letrec, so
it makes sense to convert the body into a normal letrec.
*)
and of_body : Syntactic_ast.body -> expression = function
| ([], exprs) ->
let exprs = List.map of_expr exprs in
Begin exprs
| (defs, exprs) ->
let exprs = List.map of_expr exprs in
let defs = List.map pair_of_def defs in
make_letrec defs exprs
and of_ll : Syntactic_ast.lambda_list -> string list * string option = function
| (sl, rest) -> (sl, rest)
and of_literal : Syntactic_ast.literal -> literal = function
| LitInt x -> Int x
| LitDouble x -> Double x
| LitString x -> String x
| LitCons (a, b) -> Cons (of_literal a, of_literal b)
| LitNil -> Nil
| LitSymbol s -> Symbol s
and of_expr : Syntactic_ast.expr -> expression = function
| Literal l -> Literal (of_literal l)
| Var x -> Var x
| Lambda ((args, rest), b) -> Lambda (args, rest, of_body b)
| Let (bindings, b) -> make_let bindings (of_body b)
| LetRec (bindings, b) -> make_letrec (List.map pair_of_binding bindings) [(of_body b)]
| Cond (clauses) ->
List.fold_right
(fun (e1, e2) acc -> If (e1, e2, acc))
(List.map pair_of_clause clauses)
(Literal Nil)
| If (e1, e2, e3) ->
If (of_expr e1, of_expr e2, of_expr e3)
| Set (s, e) -> Set (s, of_expr e)
| Apply (f, es) -> Apply (of_expr f, List.map of_expr es)
and of_syntactic : Syntactic_ast.top_level -> top_level = function
| Def (s, e) -> Define (s, of_expr e)
| Exp (e) -> Expr (of_expr e)
| _ -> .
let of_sexpr x =
Result.bind (Syntactic_ast.make x)
(fun x -> Ok (of_syntactic x))
let of_src src =
let sexprs = Parser.parse_str src in
traverse of_sexpr sexprs
-3
View File
@@ -1,3 +0,0 @@
(library
(name compiler)
(libraries parser vm))
-205
View File
@@ -1,205 +0,0 @@
type literal = Core_ast.literal
type expression = Scope_analysis.expression
module SymbolTable = Scope_analysis.SymbolTable
type instr = Vm.Types.instr
type pre_global =
| Global of Vm.Types.value
| BackPatchClosure
type pre_instr =
| Instr of instr
| BackPatchMkClosure of int
| BackPatchJumpF
type program = {
instrs : pre_instr Dynarray.t;
constants : Vm.Types.value Dynarray.t;
globals : pre_global Dynarray.t;
sym_table : int SymbolTable.t;
(* This array holds the lambda bodies that we have to compiler later, and
the index we have to patch the address back into.
*)
backpatch : (int * expression) Queue.t;
backpatch_const_q : (int * int * expression) Queue.t;
}
let ( let* ) = Result.bind
let current_index p =
Dynarray.length p.instrs
let set_instr p i ins =
Dynarray.set p.instrs i (Instr ins)
let emit_mkclosure p i =
Ok (Dynarray.add_last p.instrs (BackPatchMkClosure i))
let emit_jumpf p =
Ok (Dynarray.add_last p.instrs BackPatchJumpF)
let emit_instr p i =
Ok (Dynarray.add_last p.instrs (Instr i))
let emit_constant p c =
Dynarray.add_last p.constants c;
emit_instr p (Constant ((Dynarray.length p.constants) - 1))
(* evaluating an expression ALWAYS has the effect of pushing exactly
one element to the stack. For top-level items, this element is
silently popped.
*)
let rec compile_one p = function
| Scope_analysis.Literal (Int x) -> emit_constant p (Vm.Types.Int x)
| Literal Nil -> emit_constant p (Vm.Types.Nil)
| Literal (Double x) -> emit_constant p (Vm.Types.Double x)
| Literal (String s) -> emit_constant p (Vm.Types.String s)
| Literal (Symbol s) -> emit_constant p (Vm.Types.Symbol s)
| Literal (Cons (a, b)) ->
let* _ = compile_one p (Literal a) in
let* _ = compile_one p (Literal b) in
emit_instr p (Vm.Types.MakeCons)
| Var (Scope_analysis.Local i) ->
emit_instr p (Vm.Types.LoadLocal i)
| Var (Global i) ->
emit_instr p (Vm.Types.LoadGlobal i)
| Set (Local i, expr) ->
let* _ = compile_one p expr in
emit_instr p (Vm.Types.StoreLocal i)
| Set (Global i, expr) ->
let* _ = compile_one p expr in
emit_instr p (Vm.Types.StoreGlobal i)
| Apply (f, args) ->
let* _ = compile_one p f in
let* _ = compile_all_no_pop p args in
emit_instr p (Vm.Types.Apply (List.length args))
| Lambda (arg_count, body) ->
let* _ = emit_mkclosure p arg_count in
Ok (Queue.push ((Dynarray.length p.instrs) - 1, body) p.backpatch)
| If (test, t, f) ->
(* *)
let* _ = compile_one p test in (* compile the expression to be tested *)
let jumpf_index = current_index p in
let* _ = emit_jumpf p in (* jump if false, to the false branch*)
let* _ = compile_one p t in (* true branch *)
let jump_index = current_index p in
let* _ = emit_jumpf p in (* jump unconditionally to the common point*)
let false_index = current_index p in
let* _ = compile_one p f in (* false branch *)
let reunite_index = current_index p in
let* _ = emit_instr p NOOP in
(* Now we can immediately backpatch the dummy instructions we put in place *)
set_instr p jumpf_index (JumpF false_index);
set_instr p jump_index (Jump reunite_index);
Ok ()
| Begin [] ->
Error "Cannot compile empty begin "
| Begin (e1 :: []) ->
compile_one p e1
| Begin (e1 :: e2 :: rest) ->
let* _ = compile_one p e1 in
let* _ = emit_instr p Vm.Types.Pop in
compile_one p (Begin (e2 :: rest))
| Native i ->
emit_constant p (Vm.Types.Native i)
and compile_all p exprs =
Util.traverse
(fun e ->
let* _ = compile_one p e in
emit_instr p Pop) exprs
and compile_all_no_pop p exprs =
Util.traverse
(fun e ->
let* _ = compile_one p e in Ok ()) exprs
(* Once we have compiled the top-level expressions, we must now compile
all of the lambdas we held off on. Some of these will hold more
lambdas - that should be fine, they'll just get added to the end
of the backpatch queue.
*)
let backpatch_one_instr p (i, b) =
match Dynarray.get p.instrs i with
| BackPatchMkClosure arg_count ->
Dynarray.set p.instrs i (Instr (MakeClosure (arg_count, current_index p)));
let* _ = compile_one p b in
emit_instr p End
| _ -> failwith "Can't backpatch anything other than a MakeClosure after compilation"
let rec backpatch_instrs p =
if Queue.is_empty p.backpatch then
Ok ()
else
(let* _ = backpatch_one_instr p (Queue.pop p.backpatch) in
backpatch_instrs p)
let backpatch_one_const p (i, arg_count, b) =
let instr_loc = Dynarray.length p.instrs in
let* _ = compile_one p b in
let* _ = emit_instr p End in
Ok (Dynarray.set p.globals i (Global (Vm.Types.Closure (arg_count, instr_loc, []))))
let rec backpatch_consts p =
if Queue.is_empty p.backpatch_const_q then
Ok ()
else
(let* _ = backpatch_one_const p (Queue.pop p.backpatch_const_q) in
backpatch_consts p)
let backpatch p =
let* () = backpatch_instrs p in
backpatch_consts p
let print_instr = function
| Instr i -> Vm.Types.print_one i
| BackPatchJumpF -> "BACKPATCH JUMPF\n"
| BackPatchMkClosure i -> "BACKPATCH CLOSURE \n" ^ (string_of_int i)
let print_instrs =
Array.mapi_inplace (fun i ins ->
print_endline (Printf.sprintf "%d: %s" i (print_instr ins)); ins)
let smooth_one_instr = function
| Instr i -> i
| _ -> failwith "backpatching process was not complete! (instrs)"
let smooth_instrs p =
Dynarray.to_array (Dynarray.map smooth_one_instr p.instrs)
let smooth_one_global = function
| Global c -> c
| _ -> failwith "backpatching process was not complete! (consts)"
let smooth_globals p =
Dynarray.to_array (Dynarray.map smooth_one_global p.globals)
let rec constantify = function
| Core_ast.Nil -> Vm.Types.Nil
| Core_ast.Int x -> Vm.Types.Int x
| Core_ast.String s -> Vm.Types.String s
| Core_ast.Double x -> Vm.Types.Double x
| Core_ast.Cons (a, b) -> Vm.Types.Cons (constantify a, constantify b)
| Core_ast.Symbol s -> Vm.Types.Symbol s
let mk_constants (tbl : (int * expression) SymbolTable.t) =
let constants = Dynarray.make ((SymbolTable.cardinal tbl) + 1) (Global Vm.Types.Nil) in
let to_backpatch = Queue.create () in
let () = SymbolTable.iter (fun _ (i, v) -> Dynarray.set constants i (match v with
| Scope_analysis.Lambda (a, b) -> Queue.add (i, a, b) to_backpatch; BackPatchClosure
| Scope_analysis.Literal l -> Global (constantify l)
| Native i -> Global (Vm.Types.Native i)
| _ -> Global Vm.Types.Nil)) tbl in
(constants, to_backpatch)
let compile (exprs : expression list) (tbl : (int * expression) SymbolTable.t) =
let (globals, backpatch_const_q) = mk_constants tbl in
let program = {
instrs=Dynarray.create ();
constants=Dynarray.create();
globals=globals;
sym_table=SymbolTable.map (fun (a, _) -> a) tbl;
backpatch=Queue.create ();
backpatch_const_q=backpatch_const_q;
} in
let* _ = compile_all program exprs in
let* _ = emit_instr program End in
let* _ = backpatch program in
let final_instrs = smooth_instrs program in
let final_globals = smooth_globals program in
let () = print_endline "constants:"; Array.iter (fun v -> print_endline(Vm.Types.print_value v)) final_globals in
Ok (Vm.make_vm final_instrs (Dynarray.to_array program.constants) final_globals) (*((SymbolTable.cardinal tbl) + 1))*)
let compile_src src =
let* (exprs, tbl) = Scope_analysis.of_src src in
compile exprs tbl
-8
View File
@@ -1,8 +0,0 @@
let counter = ref 0
let reset () = counter := 0
let gensym base =
incr counter;
Printf.sprintf "__generated_%s_%d" base !counter
-204
View File
@@ -1,204 +0,0 @@
module SymbolTable = Map.Make(String);;
let ( let* ) = Result.bind
let traverse = Util.traverse
(* literals are not modified. *)
type literal = Core_ast.literal
(* I made this a separate type, because this behaviour is common to both symbol
accesses, and to set! operations on symbols.
They can both either refer to a local, or refer to a global, and making a
separate type for this lets us statically eliminate a couple potential
runtime errors
*)
type variable =
| Local of int
| Global of int
(* Note:
all symbol accesses are either referring to a local binding or a global one,
and this is distinguished through the variable type above.
Lambda expressions are stripped of the symbol name of their single parameter.
This name is not needed at runtime, as all symbol accesses will be resolved
into an index into either the local scope linked list or the global symbol table.
Set is also split into its global and local versions, using the above variable type.
The rest aren't modified at all.
*)
type expression =
| Literal of literal
| Var of variable
| Apply of expression * expression list
| Lambda of int * expression
| If of expression * expression * expression
| Set of variable * expression
| Begin of expression list
| Native of int
(* Native is effectively a VM primitive. Emitted here for convenience. *)
(* IMPORTANT:
This is a predefined global table.
Some symbols in the standard library have special importance, so
they must have "special" values that exist before the program is
even compiled.
For example, the print function is always global. It must always
be global number 0. Most other primitives have similar assignments.
The runtime is not stable as it is now, so a program compiled with
a current version of the compiler may not remain functional with
later versions of the runtime. The source program should remain
good though.
*)
let default_global_table =
SymbolTable.of_list [
("PRINT", (0, Native 0));
("+", (1, Native 1));
("-", (2, Native 2));
("*", (3, Native 3));
("/", (4, Native 4));
("ABS", (5, Native 5));
("MOD", (6, Native 6));
("REM", (7, Native 7))
]
(* extract all defined global symbols, given the top-level expressions
and definitions of a program
The returned table maps symbol names to unique integers, representing
an index into a global array where the values of all global symbols will
be kept at runtime.
*)
let extract_globals (top : Core_ast.top_level list) =
let id_counter = (ref (SymbolTable.cardinal default_global_table)) in
let id () =
id_counter := !id_counter + 1; !id_counter in
let rec aux tbl = function
| [] -> tbl
| Core_ast.Define (sym, _) :: rest ->
aux (SymbolTable.add sym ((id ()), Literal Nil) tbl) rest
| Expr _ :: rest ->
aux tbl rest
in aux default_global_table top
(* The current lexical scope is simply a linked list of entries,
and each symbol access will be resolved as an access to an index
in this linked list. The symbol names are erased before runtime.
During this analysis we keep the lexical scope as a linked list of
symbols, and we find the index by traversing this linked list.
*)
let resolve_global tbl sym =
match SymbolTable.find_opt sym tbl with
| Some (x, _) -> Ok (Global x)
| None -> Error ("symbol " ^ sym ^ " is not defined!")
(* First we try to resolve it to a local symbol, then look it up in the
global table if we can't find it in the local environment
*)
let resolve_symbol tbl env sym =
let rec aux counter env_num = function
| [] -> resolve_global tbl sym
| x :: rest ->
match List.find_index (String.equal sym) x with
| Some i -> Ok (Local (counter + i))
| None -> aux (counter + (List.length (x :: rest))) (env_num + 1) rest
in aux 0 0 env
let resolve_var tbl env sym =
let* sym = resolve_symbol tbl env sym in
Ok (Var sym)
let resolve_set tbl env sym expr =
let* sym = resolve_symbol tbl env sym in
Ok (Set (sym, expr))
let extract_function = function
| Core_ast.Define (s, Core_ast.Lambda (args, rest, _)) -> Some (s, args, rest)
| _ -> None
let extract_functions exprs =
let fs = List.filter Option.is_some (List.map extract_function exprs) in
let fs = List.map Option.get fs in
List.fold_left (fun t (s, args, rest) -> SymbolTable.add s (args, rest) t) SymbolTable.empty fs
let rec analyze global_tbl =
let rec aux tbl current = function
| Core_ast.Literal s -> Ok (Literal s)
| Var sym -> resolve_var tbl current sym
| Set (sym, expr) ->
let* inner = analyze global_tbl tbl current expr in
resolve_set tbl current sym inner
| Lambda (args, rest, body) ->
let args = (match rest with
| Some s -> List.append args [s]
| None -> args) in
let* body = (aux global_tbl (args :: current) body) in
Ok (Lambda (List.length args, body))
| Apply (f, es) ->
let* f = aux tbl current f in
let* e = Util.traverse (aux tbl current) es in
Ok (Apply (f, e))
| If (test, pos, neg) ->
let* test = aux tbl current test in
let* pos = aux tbl current pos in
let* neg = aux tbl current neg in
Ok (If (test, pos, neg))
| Begin el ->
let* body = traverse (aux tbl current) el in
Ok (Begin body)
in aux
let is_constantish = function
| Literal _ -> true
| Lambda _ -> true
| Native _ -> true
| _ -> false
(* We need to do some more sophisticated analysis to detect cases where
a symbol is accessed before it is defined.
If a symbol is accessed in a lambda body, that is fine, since that computation
is delayed, but for top-level forms that are directly executed we must be strict.
This function is strict by default, until it encounters a lambda, at which
point it switches to resolving against all symbols.
global_tbl is a table that contains ALL defined symbols,
tbl is a table that contains symbols defined only until this point.
NOTE: because we currently convert all let expressions into lambdas, things like
this won't immediately be rejected by the compiler:
(let ((a 5))
b)
(define b 5)
I may consider adding special support for let forms, as this is pretty annoying.
*)
let convert program =
let global_tbl = ref (extract_globals program) in
let rec aux tbl = function
| [] -> Ok []
| (Core_ast.Expr e) :: rest ->
let* analysis = (analyze !global_tbl tbl [] e) in
let* rest = aux tbl rest in
Ok (analysis :: rest)
| (Define (s, e)) :: rest ->
let (id, _) = SymbolTable.find s !global_tbl in
let* analysis = analyze !global_tbl tbl [] e in
global_tbl := SymbolTable.remove s !global_tbl;
global_tbl := SymbolTable.add s (id, analysis) !global_tbl;
let tbl = SymbolTable.add s (SymbolTable.find s !global_tbl) tbl in
let* rest = aux tbl rest in
if is_constantish analysis then Ok (rest) else Ok (analysis :: rest)
in
let* program = (aux default_global_table program) in
Ok (program, !global_tbl)
let of_src src =
let* core = (Core_ast.of_src src) in
convert core
-338
View File
@@ -1,338 +0,0 @@
(* The entire point of this module is to transform a given sexpr tree into
an intermediary AST that directly represents the grammar.
*)
(* Literals *)
type literal =
| LitInt of int
| LitDouble of float
| LitString of string
| LitCons of literal * literal
| LitSymbol of string
| LitNil
type lambda_list = string list * string option
type expr =
| Literal of literal
| Lambda of lambda_list * body
| Let of (string * expr) list * body
| LetRec of (string * expr) list * body
| Cond of (expr * expr) list
| If of expr * expr * expr
| Set of string * expr
| Var of string
| Apply of expr * expr list
and def = string * expr
and body = def list * expr list
(* On the top-level we only allow definitions and expressions *)
type top_level =
| Def of def
| Exp of expr
(* we use result here to make things nicer *)
let ( let* ) = Result.bind
let traverse = Util.traverse
let map = List.map
let unwrap_exp = function
| Ok (Exp x) -> Ok x
| Error _ as e -> e
| _ -> Error "Definition found in Expression context"
let unwrap_def x =
let* x = x in
match x with
| Def d -> Ok d
| _ -> Error "Expression found in Definition context"
let exp x = Ok (Exp x)
let lit x = Ok (Exp (Literal x))
let def x = Ok (Def x)
open Parser.Ast
let sexpr_car = function
| LCons (a, _) -> Ok a
| _ -> Error "cannot take car of expression."
let sexpr_cdr = function
| LCons (_, d) -> Ok d
| _ -> Error "cannot take cdr of expression."
let sexpr_cadr cons =
let* cdr = sexpr_cdr cons in
sexpr_car cdr
let sexpr_cddr cons =
let* cdr = sexpr_cdr cons in
sexpr_cdr cdr
let sexpr_caddr cons =
let* cddr = sexpr_cddr cons in
sexpr_car cddr
let expect_sym = function
| LSymbol s -> Ok s
| _ -> Error "Expected symbol!"
(* We must now transform the s-expression tree into a proper, typed AST
First, we need some utilities for transforming proper lists and s-expr conses.
TODO: add diagnostics, e.g. what sexpr, specifically, couldn't be turned to a list?
generally more debugging is needed in this module.
*)
let rec list_of_sexpr = function
| LCons (i, next) ->
let* next = list_of_sexpr next in
Ok (i :: next)
| LNil -> Ok []
| _ -> Error "cannot transform sexpr into list, malformed sexpr!"
(* parse the argument list of a lambda form *)
let parse_lambda_list cons =
let rec aux acc = function
| LCons (LSymbol a, LSymbol b) ->
Ok (List.rev (a :: acc), Some b)
| LCons (LSymbol a, rest) ->
aux (a :: acc) rest
| LNil -> Ok (List.rev acc, None)
| _ -> Error "Improper lambda list."
in aux [] cons
(* Is the given lisp_ast node a definition? *)
let is_def = function
| LCons (LSymbol "define", _) -> true
| _ -> false
(* These five functions all depend on each other, which is why they are
defined in this let..and chain.
*)
let rec parse_body body =
(* This helper function separates the definitions and expressions from the body *)
let rec aux acc = function
| expr :: rest when is_def expr ->
aux (expr :: acc) rest
| rest ->
Ok (List.rev acc, rest)
in
let* body = list_of_sexpr body in
let* (defs, exprs) = aux [] body in
(* Once the expressions and definitions are separated we must parse them, then
unpack them from the top_level type.
*)
let* defs = traverse (Fun.compose unwrap_def transform) defs in
let* exprs = traverse (Fun.compose unwrap_exp transform) exprs in
Ok (defs, exprs)
and builtin_define cons =
let* second = sexpr_cadr cons in
match second with
| LSymbol sym ->
(* regular, symbol/variable definition *)
let* third = sexpr_caddr cons in
let* value = unwrap_exp (transform third) in
Ok (Def (sym, value))
| LCons (LSymbol sym, ll) ->
(* function definition, we treat this as a define + lambda *)
let* lambda_list = parse_lambda_list ll in
let* body = sexpr_cddr cons in
let* body = parse_body body in
Ok (Def (sym, Lambda (lambda_list, body)))
| _ -> Error "invalid definition!"
and builtin_lambda cons =
let* lambda_list = sexpr_cadr cons in
let* lambda_list = parse_lambda_list lambda_list in
let* body = sexpr_cddr cons in
let* body = parse_body body in
exp (Lambda (lambda_list, body))
and parse_bindings cons =
let parse_one cons =
let* sym = sexpr_car cons in
let* sym = expect_sym sym in
let* expr = sexpr_cadr cons in
let* expr = unwrap_exp (transform expr) in
Ok (sym, expr)
in
let* l = list_of_sexpr cons in
traverse parse_one l
and make_builtin_let f cons =
let* bindings = sexpr_cadr cons in
let* bindings = parse_bindings bindings in
let* body = sexpr_cddr cons in
let* body = parse_body body in
exp (f bindings body)
and parse_clauses cons =
let parse_one cons =
let* test = sexpr_car cons in
let* test = unwrap_exp (transform test) in
let* expr = sexpr_cadr cons in
let* expr = unwrap_exp (transform expr) in
Ok (test, expr)
in
let* l = list_of_sexpr cons in
traverse parse_one l
and builtin_cond cons =
let* clauses = sexpr_cdr cons in
let* clauses = parse_clauses clauses in
exp (Cond clauses)
and builtin_if cons =
let* cons = sexpr_cdr cons in
let* test = sexpr_car cons in
let* test = unwrap_exp (transform test) in
let* then_branch = sexpr_cadr cons in
let* then_branch = unwrap_exp (transform then_branch) in
let* else_branch = (match sexpr_caddr cons with
| Error _ -> Ok LNil
| Ok x -> Ok x) in
let* else_branch = unwrap_exp (transform else_branch) in
exp (If (test, then_branch, else_branch))
and builtin_set cons =
let* cons = sexpr_cdr cons in
let* sym = sexpr_car cons in
let* sym = (match sym with
| LSymbol s -> Ok s
| _ -> Error "cannot (set!) a non-symbol") in
let* expr = sexpr_cadr cons in
let* expr = unwrap_exp (transform expr) in
exp (Set (sym, expr))
and builtin_quote cons =
let* expr = sexpr_cadr cons in
let lit x = exp (Literal x) in
let rec aux = function
| LSymbol s -> (LitSymbol s)
| LInt x -> (LitInt x)
| LDouble x -> (LitDouble x)
| LString x -> (LitString x)
| LCons (a, b) -> (LitCons (aux a, aux b))
| LNil -> (LitNil) in
lit (aux expr)
and apply f args =
let* args = list_of_sexpr args in
let* args = traverse (fun x -> unwrap_exp (transform x)) args in
let* f = unwrap_exp (transform f) in
exp (Apply (f, args))
and builtin_symbol = function
| "define" -> builtin_define
| "lambda" -> builtin_lambda
| "let" -> (make_builtin_let (fun x y -> Let (x,y)))
| "letrec" -> (make_builtin_let (fun x y -> LetRec (x,y)))
| "cond" -> builtin_cond
| "if" -> builtin_if
| "set!" -> builtin_set
| "quote" -> builtin_quote
| _ -> (function
| LCons (f, args) -> apply f args
| _ -> Error "Invalid function application!")
and transform : lisp_ast -> (top_level, string) result = function
| LInt x -> lit (LitInt x)
| LDouble x -> lit (LitDouble x)
| LString x -> lit (LitString x)
(* NOTE: not all symbols are automatically Variable expressions,
Some must be further parsed (such as inside a definition)
*)
| LSymbol x -> exp (Var x)
| LNil -> lit (LitNil)
| LCons (LSymbol s, _) as cons -> (builtin_symbol s) cons
| LCons (f, args) -> apply f args
let make (expr : Parser.Ast.lisp_ast) : (top_level, string) result =
transform expr
let of_src s =
Util.traverse make (Parser.parse_str s)
(* Printing, for debug purposes *)
let pf = Printf.sprintf
let rec print_lambda_list = function
| (strs, None) -> ("(" ^ (String.concat " " strs) ^ ")")
| (strs, Some x) -> ("(" ^ (String.concat " " strs) ^ " . " ^ x ^ ")")
and print_let_binding x =
let (s, expr) = x in
pf "(%s %s)" s (print_expr expr)
and print_bindings l =
("(" ^ (String.concat "\n" (map print_let_binding l)) ^ ")")
and print_clause x =
let (test, expr) = x in
pf "(%s %s)" (print_expr test) (print_expr expr)
and print_clauses l =
(String.concat "\n" (map print_clause l))
and print_def = function
| (s, expr) ->
pf "(define %s
%s)" s (print_expr expr)
and print_defs l =
String.concat "\n" (map print_def l)
and print_literal = function
| LitDouble x -> pf "%f" x
| LitInt x -> pf "%d" x
| LitString x -> pf "\"%s\"" x
| LitNil -> pf "nil"
| LitCons (a, b) -> pf "(%s . %s)" (print_literal a) (print_literal b)
| LitSymbol s -> pf "'%s" s
and print_expr = function
| Literal l -> print_literal l
| Lambda (ll, (defs, exprs)) ->
pf "(lambda %s
; DEFINITIONS
%s
; BODY
%s)"
(print_lambda_list ll)
(String.concat "\n" (map print_def defs))
(String.concat "\n" (map print_expr exprs))
| Let (binds, (defs, exprs)) ->
pf "(let
; BINDINGS
%s
; DEFINITIONS
%s
; EXPRESSIONS
%s)"
(print_bindings binds)
(print_defs defs)
(print_exprs exprs)
| LetRec (binds, (defs, exprs)) ->
pf "(letrec
; BINDINGS
%s
; BODY
%s
; EXPRESSIONS
%s)"
(print_bindings binds)
(print_defs defs)
(print_exprs exprs)
| Cond (clauses) ->
pf "(cond
%s)"
(print_clauses clauses)
| Var s -> s
| If (e1, e2, e3) ->
pf "(if %s %s %s)" (print_expr e1) (print_expr e2) (print_expr e3)
| Set (s, expr) ->
pf "(set! %s %s)" s (print_expr expr)
| Apply (f, exprs) ->
pf "(apply %s %s)"
(print_expr f)
("(" ^ (String.concat " " (map print_expr exprs)) ^ ")")
(* | _ -> "WHATEVER" *)
and print_exprs l =
String.concat "\n" (map print_expr l)
let print = function
| Def x -> print_def x
| Exp x -> print_expr x
-9
View File
@@ -1,9 +0,0 @@
let ( let* ) = Result.bind
let traverse f l =
let rec aux acc = function
| x :: xs ->
let* result = f x in
aux (result :: acc) xs
| [] -> Ok (List.rev acc) in
aux [] l
+7
View File
@@ -0,0 +1,7 @@
(library
(name lisp))
(include_subdirs unqualified)
(menhir (modules parser))
(ocamllex lexer)
+169
View File
@@ -0,0 +1,169 @@
open Ast;;
open InterpreterStdlib;;
let default_env: environment = [Hashtbl.create 1024];;
let add_builtin s f =
env_set_global default_env s (LBuiltinFunction (s, f))
let add_special s f =
env_set_global default_env s (LBuiltinSpecial (s, f))
let make_env () = [Hashtbl.copy (List.hd default_env)]
(* the type annotations are unnecessary, but help constrain us from a
potentially more general function here *)
let rec eval_sym (env: environment) (s: string) =
match env with
| [] -> raise (Invalid_argument (Printf.sprintf "eval_sym: symbol %s has no value in current scope" s))
| e :: rest ->
match Hashtbl.find_opt e s with
| None -> eval_sym rest s
| Some v -> v
let rec eval_one env = function
| LSymbol s -> eval_sym env s
| LCons (func, args) -> eval_call env (eval_one env func) args
| LQuoted v -> v
| v -> v (* All other forms are self-evaluating *)
(* Evaluate a list of values, without evaluating the resulting
function or macro call. Since macros and functions inherently
look similar, they share a lot of code, which is extracted here *)
and eval_list env l =
match l with
| LNil -> LNil
| LCons (a, b) -> LCons (eval_one env a, eval_list env b)
| _ -> raise (Invalid_argument "eval_list: cannot process non-list")
and eval_body env body =
match body with
| LNil -> LNil
| LCons (form, LNil) -> eval_one env form
| LCons (form, next) -> ignore (eval_one env form); eval_body env next
| _ -> LNil
and err s = raise (Invalid_argument s)
and bind_args env = function
| LNil -> (function
| LNil -> ()
| _ -> err "cannot bind arguments")
| LSymbol s ->
(function
| v -> env_set_local env s v; ())
| LCons (LSymbol hl, tl) -> (function
| LCons (ha, ta) ->
env_set_local env hl ha;
bind_args env tl ta;
| _ -> err "cannot bind arguments")
| _ -> fun _ -> err "bind_args"
and eval_apply args = function
| LLambda (e, l, b)
| LFunction (_, e, l, b) ->
let lexical_env = env_new_lexical e in
bind_args lexical_env l args;
eval_body lexical_env b
| LUnnamedMacro (e, l, b)
| LMacro (_, e, l, b) ->
let lexical_env = env_new_lexical e in
bind_args lexical_env l args;
eval_body lexical_env b
| v ->
err ("Non-macro non-function value passed to eval_apply "
^ dbg_print_one v); LNil
and eval_call env func args =
match func with
| LBuiltinSpecial (_, f) -> f env args
| LBuiltinFunction (_, f) -> f env (eval_list env args)
(* The function calls don't happen in the calling environment,
so it makes no sense to pass env to a call. *)
| LLambda _
| LFunction _ -> eval_apply (eval_list env args) func
(* Macros are the same, they just return code that *will* be evaluated
in the calling environment *)
| LUnnamedMacro _
| LMacro _ -> eval_one env (eval_apply args func)
| v -> raise (Invalid_argument
(Printf.sprintf "eval_apply: cannot call non-function object %s" (dbg_print_one v)))
and (* This only creates a *local* binding, contained to the body given. *)
bind_local env = function
| LCons (LSymbol s, LCons (v, body)) ->
let e = env_new_lexical env in
env_set_local e s v;
eval_body e body
| _ -> invalid_arg "invalid argument to bind-local"
(* special form that creates a global binding *)
and lisp_define env = function
| LCons (LSymbol s, LCons (v, LNil)) ->
let evaluated = eval_one env v in
env_set_global env s evaluated;
evaluated
| _ -> invalid_arg "invalid args to def"
and lisp_if env = function
| LCons (cond, LCons (if_true, LNil)) ->
(match eval_one env cond with
| LNil -> LNil
| _ -> eval_one env if_true)
| LCons (cond, LCons (if_true, LCons (if_false, LNil))) ->
(match eval_one env cond with
| LNil -> eval_one env if_false
| _ -> eval_one env if_true)
| _ -> invalid_arg "invalid argument list passed to if!"
let eval_all env vs =
let ev v = eval_one env v in
List.map ev vs
let () = add_builtin "+" add
let () = add_builtin "-" sub
let () = add_builtin "car" car
let () = add_builtin "cdr" cdr
let () = add_builtin "cons" cons
let () = add_special "def" lisp_define
let () = add_builtin "set" lisp_set
let () = add_builtin "list" lisp_list
let () = add_special "fn" lambda
let () = add_special "fn-macro" lambda_macro
let () = add_special "let-one" bind_local
let () = add_special "quote" (fun _ -> function
| LCons (x, LNil) -> x
| _ -> invalid_arg "hmm")
let () = add_special "if" lisp_if
let () = add_builtin "nil?" lisp_not
let () = add_builtin "not" lisp_not (* Yes, these are the same thing *)
(*let () = add_builtin "print" lisp_prin *)
(* I know this looks insane. please trust me.
Idea: maybe put this in a file instead of putting
literally the entire standard library in a constant string
*)
let _ = eval_all default_env (Read.parse_str
"
(def defn
(fn-macro (name lm . body)
(list 'def name (cons 'fn (cons lm body)))))
(def defmacro
(fn-macro (name lm . body)
(list 'def name (cons 'fn-macro (cons lm body)))))
(defmacro setq (sym val)
(list 'set (list 'quote sym) val))
(defmacro letfn (sym fun . body)
(cons 'let-one (cons sym (cons '() (cons (list 'setq sym fun) body)))))
(defn filter (f l)
(letfn helper
(fn (l acc)
(if (nil? l) acc (helper (cdr l) (if (f (car l)) (cons (car l) acc) acc))))
(helper l '())))
")
+69
View File
@@ -0,0 +1,69 @@
open Ast;;
let add _ vs =
let rec aux accum = function
| LCons (a, b) ->
(match accum, a with
| LInt x , LInt y -> aux (LInt (x + y)) b
| LDouble x, LInt y -> aux (LDouble (x +. (float_of_int y))) b
| LInt x, LDouble y -> aux (LDouble ((float_of_int x) +. y)) b
| LDouble x, LDouble y -> aux (LDouble (x +. y)) b
| _ -> invalid_arg "invalid args to +")
| LNil -> accum
| _ -> invalid_arg "invalid args to +"
in aux (LInt 0) vs
let sub _ vs =
let rec aux accum = function
| LNil -> accum
| LCons (a, b) -> (match accum, a, b with
| LNil, LDouble x, LNil -> LDouble (-. x)
| LNil, LInt x, LNil -> LInt (-x)
| LNil, LDouble _, _
| LNil, LInt _, _ -> aux a b
| LInt x, LInt y, _ -> aux (LInt (x - y)) b
| LInt x, LDouble y, _ -> aux (LDouble ((float_of_int x) -. y)) b
| LDouble x, LDouble y, _ -> aux (LDouble (x -. y)) b
| LDouble x, LInt y, _ -> aux (LDouble (x -. (float_of_int y))) b
| _ -> invalid_arg "invalid argument to -")
| _ -> invalid_arg "argument to -"
in aux LNil vs
let car _ vs =
match vs with
| LCons (LCons (a, _), LNil) -> a
| _ -> raise (Invalid_argument "car: invalid argument")
let cdr _ vs =
match vs with
| LCons (LCons (_, b), LNil) -> b
| _ -> raise (Invalid_argument "cdr: invalid argument")
let cons _ vs =
match vs with
| LCons (a, LCons (b, LNil)) -> LCons (a, b)
| _ -> invalid_arg "invalid args to cons!"
let lisp_list _ vs = vs
(* builtin function that updates an existing binding *)
let lisp_set env = function
| LCons (LSymbol s, LCons (v, LNil)) ->
env_update env s v;
v
| _ -> invalid_arg "invalid args to set"
let lambda env = function
| LCons (l, body) ->
LLambda (env, l, body)
| _ -> raise (Invalid_argument "invalid args to lambda!")
let lambda_macro env = function
| LCons (l, body) -> LUnnamedMacro (env, l, body)
| _ -> invalid_arg "invalid args to lambda-macro"
let lisp_not _ = function
| LCons (LNil, LNil) -> LSymbol "t"
| _ -> LNil
+2 -2
View File
@@ -1,6 +1,6 @@
{ {
open Lexing open Lexing
open Parse open Parser
exception SyntaxError of string exception SyntaxError of string
let strip_quotes s = String.sub s 1 (String.length s - 2);; let strip_quotes s = String.sub s 1 (String.length s - 2);;
@@ -14,7 +14,7 @@ let double = digit* '.' digit+ | digit+ '.' digit*
let white = [' ' '\t']+ let white = [' ' '\t']+
let newline = '\r' | '\n' | "\r\n" let newline = '\r' | '\n' | "\r\n"
let sym_char = ['a'-'z' 'A'-'Z' '!' '\\' '+' '-' '*' '/' '_' '?' '=' '>' '<'] let sym_char = ['a'-'z' 'A'-'Z' '!' '\\' '+' '-' '*' '/' '_' '?']
let sym = sym_char sym_char* let sym = sym_char sym_char*
let str = '"' [^'"']* '"' let str = '"' [^'"']* '"'
+3 -3
View File
@@ -12,7 +12,7 @@
%token DOT %token DOT
%token EOF %token EOF
%start <lisp_ast option> prog %start <Ast.lisp_val option> prog
%% %%
prog: prog:
@@ -23,8 +23,8 @@ prog:
expr: expr:
| i = INT { LInt i } | i = INT { LInt i }
| d = DOUBLE {LDouble d} | d = DOUBLE {LDouble d}
| s = SYM { LSymbol (String.uppercase_ascii s) } | s = SYM { LSymbol s }
| s = STR { LString s} | s = STR { LString (String.uppercase_ascii s) }
| LPAREN; l = lisp_list_rest { l } | LPAREN; l = lisp_list_rest { l }
| QUOTE; e = expr { LCons (LSymbol "quote", LCons (e, LNil)) } | QUOTE; e = expr { LCons (LSymbol "quote", LCons (e, LNil)) }
; ;
-10
View File
@@ -1,10 +0,0 @@
type lisp_ast =
| LInt of int
| LDouble of float
| LSymbol of string
| LString of string
| LNil
| LCons of lisp_ast * lisp_ast
-7
View File
@@ -1,7 +0,0 @@
(library
(name parser)
(modules parser lex parse ast)
(package ollisp))
(menhir (modules parse))
(ocamllex lex)
+1 -4
View File
@@ -1,4 +1,4 @@
let parse_one lb = Parse.prog (Lex.read) lb let parse_one lb = Parser.prog (Lexer.read) lb
let parse lb = let parse lb =
let rec helper () = let rec helper () =
@@ -11,6 +11,3 @@ let parse lb =
let parse_str s = let parse_str s =
parse (Lexing.from_string s) parse (Lexing.from_string s)
module Ast = Ast
module Parse = Parse
-3
View File
@@ -1,3 +0,0 @@
(library
(name vm)
)
-103
View File
@@ -1,103 +0,0 @@
(* This file implements native functions of the VM runtime.
Stuff like printing to the screen, file I/O etc will be implemented
here.
*)
open Types
type numeric_val =
| NInt of int
| NDouble of float
let to_numeric = function
| Int x -> NInt x
| Double x -> NDouble x
| v -> failwith ((print_value v) ^ " is not a numeric value")
let of_numeric = function
| NInt x -> Int x
| NDouble x -> Double x
let float_of_numeric = function
| NInt x -> float_of_int x
| NDouble x -> x
let numeric_generic fi fd = function
| NInt x -> (function
| NInt y -> NInt (fi x y)
| NDouble y -> NDouble (fd (float_of_int x) y))
| NDouble x -> (function
| NInt y -> NDouble (fd x (float_of_int y))
| NDouble y -> NDouble (fd x y))
let numeric_add = numeric_generic (+) (+.)
let numeric_sub = numeric_generic (-) (-.)
let numeric_mul = numeric_generic ( * ) ( *. )
let numeric_div x y =
NDouble ((float_of_numeric x) /. (float_of_numeric y))
module type Num = sig
type t
val add : t -> t -> t
val zero : t
val rem : t -> t -> t
end
let aux_mod (type a) (module M : Num with type t = a) (x:a) (y:a) =
let z = M.rem x y in
if z < M.zero then M.add z y else z
let numeric_mod = numeric_generic (aux_mod (module Int)) (aux_mod (module Float))
let numeric_rem = numeric_generic (Int.rem) (Float.rem)
let builtin_print (v : Types.value ref list) =
List.iter (fun r -> print_endline (print_value !r)) v;
Types.Nil
let builtin_add (vs : Types.value ref list) =
of_numeric (List.fold_left numeric_add (NInt 0) (List.map (fun r -> to_numeric !r) vs))
let builtin_sub (vs : Types.value ref list) =
match vs with
| f :: [] -> of_numeric (match (to_numeric !f) with
| NInt x -> NInt (Int.neg x)
| NDouble x -> NDouble (Float.neg x))
| f :: rest -> of_numeric (List.fold_left numeric_sub (to_numeric !f) (List.map (fun r -> to_numeric !r) rest))
| [] -> failwith "invalid number of arguments for subtraction: 0"
let builtin_mul vs =
of_numeric (List.fold_left numeric_mul (NInt 1) (List.map (fun r -> to_numeric !r) vs))
let builtin_div vs =
match vs with
| f :: [] -> of_numeric (numeric_div (NDouble 1.0) (to_numeric !f))
| f :: rest -> of_numeric (List.fold_left numeric_div (to_numeric !f) (List.map (fun r -> to_numeric !r) rest))
| [] -> failwith "invalid number of arguments for division: 0"
let make_single_func s f = function
| first :: [] -> f first
| v -> failwith ("invalid number of arguments for " ^s ^ ": " ^ (string_of_int (List.length v)))
let make_two_func s f = function
| first :: second :: [] -> f first second
| v -> failwith ("invalid number of arguments for " ^ s ^ ": " ^ (string_of_int (List.length v)))
let builtin_abs = make_single_func "ABS" (fun f -> of_numeric (match to_numeric !f with
| NInt x -> NInt (Int.abs x)
| NDouble x -> NDouble (Float.abs x)))
let builtin_mod =
make_two_func "MOD" (fun x y -> of_numeric (numeric_mod (to_numeric !x) (to_numeric !y)))
let builtin_rem =
make_two_func "REM" (fun x y -> of_numeric (numeric_rem (to_numeric !x) (to_numeric !y)))
let table = [|
builtin_print;
builtin_add;
builtin_sub;
builtin_mul;
builtin_div;
builtin_abs;
builtin_mod;
builtin_rem
|]
-74
View File
@@ -1,74 +0,0 @@
type value =
| Int of int
| Double of float
| String of string
| Nil
| Cons of value * value
| Symbol of string
| Closure of int * int * value ref list
| Native of int (* This is basically a syscall, each ID represents a primitive operation
that should have a well-defined effect. These will be further detailed
in the language documentation
*)
type instr =
| Constant of int
| LoadLocal of int
| LoadGlobal of int
| StoreLocal of int
| StoreGlobal of int
| MakeCons
| Pop (* discards top of stack *)
| Apply of int (* arg count *)
| MakeClosure of int * int (* arg count, code pointer *)
| Jump of int
| JumpF of int (* jump if false. *)
| End
| NOOP
type vm_state = {
mutable i : int;
instrs : instr array;
globals : value array;
constants : value array;
mutable env : value ref list;
mutable stack : value list;
mutable call_stack : (int * (value ref list)) list;
}
let p = Printf.sprintf
let rec print_value = function
| Int x -> p "%d" x
| Double x -> p "%f" x
| String x -> p "\"%s\"" x
| Nil -> p "'()"
| Cons (a, b) -> p "(%s . %s)" (print_value a) (print_value b)
| Symbol x -> p "'%s" x
| Closure (a, i, _) -> p "<closure of %d args at %d>" a i
| Native i -> p "<native %d>" i
let print_one = function
| Constant i -> p "CONSTANT %d\n" i
| LoadLocal i -> p "LOCAL %d\n" i
| LoadGlobal i -> p "GLOBAL %d\n" i
| StoreLocal i -> p "STORE_LOCAL %d\n" i
| StoreGlobal i -> p "STORE_GLOBAL %d\n" i
| MakeCons -> p "CONS\n"
| Pop -> p "POP\n"
| Apply i -> p "APPLY %d\n" i
| MakeClosure (a, i) -> p "MKCLOSURE %d, %d\n" a i
| Jump i -> p "JMP %d\n" i
| JumpF i -> p "JMPF %d\n" i
| End -> p "END\n"
| NOOP -> p "NOOP\n"
let print_instrs instrs =
Array.mapi_inplace
(fun i ins ->
print_string (p "%d: %s" i (print_one ins));
ins)
instrs
-97
View File
@@ -1,97 +0,0 @@
module Types = Types
open Types
let do_local state i f =
match List.nth_opt state.env i with
| None -> failwith "Invalid index for local access"
| Some x -> f x
let load_local state i =
do_local state i (!)
let set_local state i v =
do_local state i (fun r -> r := v)
let pop_one state =
match state.stack with
| v :: rest -> state.stack <- rest; v
| [] -> failwith ("VM error: cannot pop from empty stack! " )
let pop_args state count =
let rec aux acc i =
if i <= 0 then acc
else aux ((ref (pop_one state)) :: acc) (i - 1)
in aux [] count
let peek_one state =
match state.stack with
| v :: _ -> v
| [] -> failwith ("VM error: cannot peek on empty stack! " )
let push state v =
state.stack <- (v :: state.stack)
let trace state =
let stack () = List.fold_left (fun acc x -> acc ^ " " ^ (Types.print_value x)) "" state.stack in
let env () = List.fold_left (fun acc x -> acc ^ " " ^ (Types.print_value !x)) "" state.env in
Printf.printf "%d: \n\tstack: [%s ]\n\tenv:[%s]\n" state.i (stack ()) (env ())
let rec do_apply state arg_count =
let cur_env = state.env in
let cur_i = state.i in
let args = pop_args state arg_count in
let f = pop_one state in
match f with
| Closure (a, _, _) when a != arg_count -> failwith "Wrong argument count to function"
| Closure (_, x, e) ->
state.call_stack <- (cur_i, cur_env) :: state.call_stack;
state.i <- x;
state.env <- List.append args e;
interpret state
| Native x ->
push state (Native.table.(x) args);
interpret state
| _ -> failwith "Cannot apply non-closure object"
and interpret state =
(*trace state; (*For debug use only*)*)
let i = state.i in
state.i <- i + 1;
(match state.instrs.(i) with
| Constant x -> push state state.constants.(x) ; interpret state
| LoadLocal x -> push state (load_local state x) ; interpret state
| LoadGlobal x -> push state state.globals.(x) ; interpret state
| StoreLocal x -> set_local state x (peek_one state) ; interpret state
| StoreGlobal x -> Array.set state.globals x (peek_one state) ; interpret state
| MakeCons ->
let cdr = pop_one state in
let car = pop_one state in
push state (Cons (car, cdr))
| Pop -> ignore (pop_one state) ; interpret state
| Apply a -> do_apply state a
| MakeClosure (args, x) -> push state (Closure (args, x, state.env)); interpret state
| Jump target -> state.i <- target ; interpret state
| JumpF target ->
(match (pop_one state) with
| Nil -> state.i <- target
| _ -> ()); interpret state
| End ->
(match state.call_stack with
| [] ->
print_endline "\nPROGRAM HAS SUCCESSFULLY TERMINATED"
| (old_i, old_env) :: rest ->
state.call_stack <- rest;
state.env <- old_env;
state.i <- old_i;
interpret state)
| NOOP -> interpret state)
let make_vm instrs constants globals =
(*let globals = Array.init global_count (fun x -> if x < (Array.length Native.table) then Native x else Nil) in*)
{
i = 0;
instrs = instrs;
globals = globals;
constants = constants;
env = [];
stack = [];
call_stack = [];
}
View File
-21
View File
@@ -1,21 +0,0 @@
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
depends: [
"dune" {>= "3.7"}
"menhir"
"odoc" {with-doc}
]
build: [
["dune" "subst"] {dev}
[
"dune"
"build"
"-p"
name
"-j"
jobs
"@install"
"@runtest" {with-test}
"@doc" {with-doc}
]
]
-7
View File
@@ -1,7 +0,0 @@
{pkgs ? import <nixpkgs> {}, ...}:
# I use emacs and merlin while developing
pkgs.mkShell {
inputsFrom = [(import ./default.nix {pkgs=pkgs;})];
packages = [pkgs.ocamlPackages.merlin];
}