Compare commits

..

4 Commits

Author SHA1 Message Date
haxala1r 2b02740e68 compiler: add the initial draft for compiling into byte code with backpatching
ci/woodpecker/push/debian Pipeline was successful
ci/woodpecker/push/nix Pipeline was successful
ci/woodpecker/push/fedora Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful
2026-04-25 20:46:28 +03:00
haxala1r 190ec94e14 compiler: make space for compiling into the VM bytecode 2026-04-25 20:44:37 +03:00
haxala1r fe3ad80826 vm: added noop instruction, various other improvements 2026-04-25 20:43:55 +03:00
haxala1r 2822774931 vm: modified the vm to include native procedures, and changed the order of some parameters 2026-04-25 00:08:54 +03:00
6 changed files with 209 additions and 39 deletions
+1 -1
View File
@@ -1,3 +1,3 @@
(library (library
(name compiler) (name compiler)
(libraries parser)) (libraries parser vm))
+119
View File
@@ -0,0 +1,119 @@
type literal = Core_ast.literal
type expression = Scope_analysis.expression
module SymbolTable = Scope_analysis.SymbolTable
type instr = Vm.Types.instr
type pre_instr =
| Instr of instr
| BackPatchMkClosure
| BackPatchJumpF
type program = {
instrs : pre_instr Dynarray.t;
constants : Vm.Types.value Dynarray.t;
sym_table : int SymbolTable.t;
(* This array holds the lambda bodies that we have to compiler later, and
the index we have to patch the address back into.
*)
backpatch : (int * expression) Queue.t;
}
let ( let* ) = Result.bind
let current_index p =
Dynarray.length p.instrs
let set_instr p i ins =
Dynarray.set p.instrs i (Instr ins)
let emit_mkclosure p =
Ok (Dynarray.add_last p.instrs BackPatchMkClosure)
let emit_jumpf p =
Ok (Dynarray.add_last p.instrs BackPatchJumpF)
let emit_instr p i =
Ok (Dynarray.add_last p.instrs (Instr i))
let emit_constant p c =
Dynarray.add_last p.constants c;
emit_instr p (Constant ((Dynarray.length p.constants) - 1))
(* evaluating an expression ALWAYS has the effect of pushing exactly
one element to the stack. For top-level items, this element is
silently popped.
*)
let rec compile_one p = function
| Scope_analysis.Literal (Int x) -> emit_constant p (Vm.Types.Int x)
| Literal Nil -> emit_constant p (Vm.Types.Nil)
| Literal (Double x) -> emit_constant p (Vm.Types.Double x)
| Literal (String s) -> emit_constant p (Vm.Types.String s)
| Literal (Cons (a, b)) ->
let* _ = compile_one p (Literal a) in
let* _ = compile_one p (Literal b) in
emit_instr p (Vm.Types.MakeCons)
| Var (Scope_analysis.Local i) ->
emit_instr p (Vm.Types.LoadLocal i)
| Var (Global i) ->
emit_instr p (Vm.Types.LoadGlobal i)
| Set (Local i, expr) ->
let* _ = compile_one p expr in
emit_instr p (Vm.Types.StoreLocal i)
| Set (Global i, expr) ->
let* _ = compile_one p expr in
emit_instr p (Vm.Types.StoreGlobal i)
| Apply (f, arg) ->
let* _ = compile_one p f in
let* _ = compile_one p arg in
emit_instr p Vm.Types.Apply
| Lambda body ->
let* _ = emit_mkclosure p in
Ok (Queue.push ((Dynarray.length p.instrs) - 1, body) p.backpatch)
| If (test, t, f) ->
(* *)
let* _ = compile_one p test in (* compile the expression to be tested *)
let jumpf_index = current_index p in
let* _ = emit_jumpf p in (* jump if false, to the false branch*)
let* _ = compile_one p t in (* true branch *)
let jump_index = current_index p in
let* _ = emit_jumpf p in (* jump unconditionally to the common point*)
let false_index = current_index p in
let* _ = compile_one p f in (* false branch *)
let reunite_index = current_index p in
let* _ = emit_instr p NOOP in
(* Now we can immediately backpatch the dummy instructions we put in place *)
set_instr p jumpf_index (JumpF false_index);
set_instr p jump_index (Jump reunite_index);
Ok ()
| Begin [] ->
Error "Cannot compile empty begin "
| Begin (e1 :: []) ->
compile_one p e1
| Begin (e1 :: e2 :: rest) ->
let* _ = compile_one p e1 in
compile_one p (Begin (e2 :: rest))
and compile_all p exprs =
Util.traverse (compile_one p) exprs
(* Once we have compiled the top-level expressions, we must now compile
all of the lambdas we held off on. Some of these will hold more
lambdas - that should be fine, they'll just get added to the end
of the backpatch queue.
*)
let backpatch_one p (i, b) =
Dynarray.set p.instrs i (Instr (MakeClosure (current_index p)));
let* _ = compile_one p b in
emit_instr p End
let backpatch p =
if Queue.is_empty p.backpatch then
Ok ()
else backpatch_one p (Queue.pop p.backpatch)
let compile (exprs : expression list) (tbl : int SymbolTable.t) =
let program = {
instrs=Dynarray.create ();
constants=Dynarray.create ();
sym_table=tbl;
backpatch=Queue.create ();
} in
let* _ = compile_all program exprs in
backpatch program
+19 -1
View File
@@ -39,6 +39,24 @@ type expression =
| Set of variable * expression | Set of variable * expression
| Begin of expression list | Begin of expression list
(* IMPORTANT:
This is a predefined global table.
Some symbols in the standard library have special importance, so
they must have "special" values that exist before the program is
even compiled.
For example, the print function is always global. It must always
be global number 0. Most other primitives have similar assignments.
The runtime is not stable as it is now, so a program compiled with
a current version of the compiler may not remain functional with
later versions of the runtime. The source program should remain
good though.
*)
let default_global_table =
SymbolTable.of_list [
("print", 0);
("add", 1)
]
(* extract all defined global symbols, given the top-level expressions (* extract all defined global symbols, given the top-level expressions
and definitions of a program and definitions of a program
@@ -57,7 +75,7 @@ let extract_globals (top : Core_ast.top_level list) =
aux (SymbolTable.add sym (id ()) tbl) rest aux (SymbolTable.add sym (id ()) tbl) rest
| Expr _ :: rest -> | Expr _ :: rest ->
aux tbl rest aux tbl rest
in aux SymbolTable.empty top in aux default_global_table top
(* The current lexical scope is simply a linked list of entries, (* The current lexical scope is simply a linked list of entries,
and each symbol access will be resolved as an access to an index and each symbol access will be resolved as an access to an index
+19
View File
@@ -0,0 +1,19 @@
(* This file implements native functions of the VM runtime.
Stuff like printing to the screen, file I/O etc will be implemented
here.
*)
open Types
let builtin_print (v : value) =
ignore v;
Nil
let table = [|
builtin_print
|]
+39
View File
@@ -0,0 +1,39 @@
type value =
| Int of int
| Double of float
| String of string
| Nil
| Cons of value * value
| Symbol of string
| Closure of int * value ref list
| Native of int (* This is basically a syscall, each ID represents a primitive operation
that should have a well-defined effect. These will be further detailed
in the language documentation
*)
type instr =
| Constant of int
| LoadLocal of int
| LoadGlobal of int
| StoreLocal of int
| StoreGlobal of int
| MakeCons
| Pop (* discards top of stack *)
| Apply
| MakeClosure of int
| Jump of int
| JumpF of int (* jump if false. *)
| End
| NOOP
type vm_state = {
mutable i : int;
instrs : instr array;
globals : value array;
constants : value array;
mutable env : value ref list;
mutable stack : value list
}
(* TODO: add facilities to print the VM state in case of errors. *)
+12 -37
View File
@@ -1,37 +1,7 @@
type value = module Types = Types
| Int of int open Types
| Double of float
| String of string
| Nil
| Cons of value * value
| Symbol of string
| Closure of int * value ref list
| NativeClosure of (value -> value)
type instr =
| Constant of int
| LoadLocal of int
| LoadGlobal of int
| StoreLocal of int
| StoreGlobal of int
| Pop (* discards top of stack *)
| Apply
| MakeClosure of int
| Jump of int
| JumpF of int (* jump if false. *)
| End
type vm_state = {
mutable i : int;
instrs : instr array;
globals : value array;
constants : value array;
mutable env : value ref list;
mutable stack : value list
}
(* TODO: add facilities to print the VM state in case of errors. *)
let do_local state i f = let do_local state i f =
match List.nth_opt state.env i with match List.nth_opt state.env i with
@@ -53,17 +23,17 @@ let push state v =
let rec do_apply state = let rec do_apply state =
let cur_env = state.env in let cur_env = state.env in
let cur_i = state.i in let cur_i = state.i in
let f = pop_one state in
let arg = pop_one state in let arg = pop_one state in
let f = pop_one state in
match f with match f with
| Closure (x, e) -> | Closure (x, e) ->
state.env <- e; state.env <- ref arg :: e;
state.i <- x; state.i <- x;
interpret state; interpret state;
state.env <- cur_env; state.env <- cur_env;
state.i <- cur_i state.i <- cur_i
| NativeClosure f -> | Native x ->
push state (f arg) push state (Native.table.(x) arg)
| _ -> failwith "Cannot apply non-closure object" | _ -> failwith "Cannot apply non-closure object"
and interpret state = and interpret state =
@@ -75,6 +45,10 @@ and interpret state =
| LoadGlobal x -> push state state.globals.(x) ; interpret state | LoadGlobal x -> push state state.globals.(x) ; interpret state
| StoreLocal x -> set_local state x (pop_one state) ; interpret state | StoreLocal x -> set_local state x (pop_one state) ; interpret state
| StoreGlobal x -> Array.set state.globals x (pop_one state) ; interpret state | StoreGlobal x -> Array.set state.globals x (pop_one state) ; interpret state
| MakeCons ->
let cdr = pop_one state in
let car = pop_one state in
push state (Cons (car, cdr))
| Pop -> ignore (pop_one state) ; interpret state | Pop -> ignore (pop_one state) ; interpret state
| Apply -> do_apply state ; interpret state | Apply -> do_apply state ; interpret state
| MakeClosure x -> push state (Closure (x, state.env)); interpret state | MakeClosure x -> push state (Closure (x, state.env)); interpret state
@@ -83,5 +57,6 @@ and interpret state =
(match (pop_one state) with (match (pop_one state) with
| Nil -> state.i <- target | Nil -> state.i <- target
| _ -> ()); interpret state | _ -> ()); interpret state
| End -> ()) | End -> ()
| NOOP -> interpret state)