From adbf083c3dacf5809a985111ac00afa6cb153e65 Mon Sep 17 00:00:00 2001 From: haxala1r Date: Mon, 8 Dec 2025 22:25:36 +0300 Subject: [PATCH] Reorganized project --- .envrc | 1 + .gitignore | 2 + flake.nix | 23 ++++ lib/compiler/compilation.ml | 25 +++++ lib/compiler/dune | 3 + lib/interpreter/ast.ml | 142 +++++++++++++++++++++++++ lib/interpreter/dune | 4 + lib/interpreter/env.ml | 38 +++++++ lib/interpreter/eval.ml | 76 ++++++++++++++ lib/interpreter/stdlib.ml | 204 ++++++++++++++++++++++++++++++++++++ lib/parser/ast.ml | 9 ++ lib/parser/dune | 7 ++ lib/parser/lex.mll | 35 +++++++ lib/parser/parse.mly | 36 +++++++ lib/parser/parser.ml | 16 +++ 15 files changed, 621 insertions(+) create mode 100644 .envrc create mode 100644 flake.nix create mode 100644 lib/compiler/compilation.ml create mode 100644 lib/compiler/dune create mode 100644 lib/interpreter/ast.ml create mode 100644 lib/interpreter/dune create mode 100644 lib/interpreter/env.ml create mode 100644 lib/interpreter/eval.ml create mode 100644 lib/interpreter/stdlib.ml create mode 100644 lib/parser/ast.ml create mode 100644 lib/parser/dune create mode 100644 lib/parser/lex.mll create mode 100644 lib/parser/parse.mly create mode 100644 lib/parser/parser.ml diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..3550a30 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.gitignore b/.gitignore index b59df2a..7476993 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ _build *~ +.direnv +result diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..4f3cbec --- /dev/null +++ b/flake.nix @@ -0,0 +1,23 @@ +{ + description = "a lisp interpreter/compiler in ocaml"; + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + }; + + outputs = {self, nixpkgs}: + let + pkgs = nixpkgs.legacyPackages.x86_64-linux; + in + { + packages.x86_64-linux.default = pkgs.ocamlPackages.buildDunePackage { + pname = "ollisp"; + version = "0.0.1"; + src = pkgs.lib.cleanSource ./.; + preBuildPhase = "ls -R"; + nativeBuildInputs = with pkgs; [ + ocamlPackages.menhir + ]; + + }; + }; +} diff --git a/lib/compiler/compilation.ml b/lib/compiler/compilation.ml new file mode 100644 index 0000000..4fea293 --- /dev/null +++ b/lib/compiler/compilation.ml @@ -0,0 +1,25 @@ + +open Parser.Ast;; + +(* This type represents an intermediate step between the AST and opcodes in our + compiler. We need this extra step to resolve addresses, e.g. how do you know + what exact address an if expression needs to jump to before you compile it? + you don't, you just keep a symbolic label there, resolve later. + *) +type intermediate_opcode = + | ISelect of string * string + | ILDF of string + | ILD of int (* an index into the constant table *) + | INil + | IRet + | IAdd + | IJoin + | ILabel of string (* does not emit any byte code *) + + + + +(* TODO: Complete *) +let (compile : lisp_ast -> intermediate_opcode list) = function + | LInt x -> [ILD x] + | _ -> [];; diff --git a/lib/compiler/dune b/lib/compiler/dune new file mode 100644 index 0000000..b15ac79 --- /dev/null +++ b/lib/compiler/dune @@ -0,0 +1,3 @@ +(library + (name compiler) + (libraries parser)) diff --git a/lib/interpreter/ast.ml b/lib/interpreter/ast.ml new file mode 100644 index 0000000..5135236 --- /dev/null +++ b/lib/interpreter/ast.ml @@ -0,0 +1,142 @@ + +(* This is different from the lisp_ast data returned by the parser! + We will first need to translate that into this in order to use it. + This representation includes things that can only occur during runtime, + like the various kinds of functions and macros. + + Additionally, since this is an interpreter, macros tend to be a little + awkward in that they behave exactly like the macro gets expanded just + before the result gets executed. This is different from the compiled + behaviour where the macro is evaluated at compile time. + + Though of course, with the dynamic nature of lisp, and its capability + to compile more code at runtime, there will naturally be complications. + *) +type lisp_val = + | LInt of int + | LDouble of float + | LCons of lisp_val * lisp_val + | LNil + | LSymbol of string + | LString of string + + (* a builtin function is expressed as a name and the ocaml function + that performs the operation. The function should take a list of arguments. + generally, builtin functions should handle their arguments directly, + and eval forms in the environment as necessary. *) + | LBuiltinFunction of string * (environment -> lisp_val -> lisp_val) + | LBuiltinSpecial of string * (environment -> lisp_val -> lisp_val) + (* a function is a name, captured environment, a parameter list, and function body. *) + | LFunction of string * environment * lisp_val * lisp_val + | LLambda of environment * lisp_val * lisp_val + (* a macro is exactly the same as a function, with the distinction + that it receives all of its arguments completely unevaluated + *) + | LMacro of string * environment * lisp_val * lisp_val + | LUnnamedMacro of environment * lisp_val * lisp_val + | LQuoted of lisp_val +(* the environment type needs to be defined here, as it is mutually + recursive with lisp_val *) +and environment = (string, lisp_val) Hashtbl.t list + +(* It is clear that we need some primitives for working with the lisp + data structures. + + For example, the LCons and LNil values, together, form a linked list. + This is the intended form of all source code in lisp, yet because + we are using our own implementation of a linked list instead of + ocaml's List, we can not use its many functions. + + It may be tempting to switch to a different implementation. + Remember however, that classic lisp semantics allow for the + CDR component of a cons cell (the part that would point to the + next member) to be of a type other than the list itself. + *) + +let reverse vs = + let rec aux prev = function + | LNil -> prev + | LCons (v, next) -> aux (LCons (v, prev)) next + | _ -> invalid_arg "cannot reverse non-list!" + in aux LNil vs + +let map f = + let rec aux accum = function + | LNil -> reverse accum + | LCons (v, next) -> aux (LCons (f v, accum)) next + | _ -> invalid_arg "cannot map over non-list!" + in aux LNil + +let reduce init f = + let rec aux accum = function + | LNil -> accum + | LCons (v, next) -> aux (f accum v) next + | _ -> invalid_arg "cannot reduce over non-list!" + in aux init + +let rec dbg_print_list = + let pf = Printf.sprintf in + function + | LCons (v, LNil) -> pf "%s" (dbg_print_one v) + | LCons (v, rest) -> (pf "%s " (dbg_print_one v)) ^ (dbg_print_list rest) + | v -> pf ". %s" (dbg_print_one v) + +and dbg_print_one v = + let pf = Printf.sprintf in + match v with + | LInt x -> pf "" x + | LSymbol s -> pf "" s + | LString s -> pf "" s + | LNil -> pf "" + | LCons _ -> pf "" (dbg_print_list v) + | LDouble d -> pf "" d + | LBuiltinSpecial (name, _) + | LBuiltinFunction (name, _) -> pf "" name + | LLambda (_, args, _) -> pf "" + (dbg_print_one args) + | LFunction (name, _, args, _) -> pf "" + name (dbg_print_one args) + | LUnnamedMacro (_, args, _) -> pf "" + (dbg_print_one args) + | LMacro (name, _, args, _) -> pf "" + name (dbg_print_one args) + | LQuoted v -> pf "" (dbg_print_one v) + (*| _ -> ""*) + +let rec pretty_print_one v = + let pf = Printf.sprintf in + match v with + | LInt x -> pf "%d" x + | LSymbol s -> pf "%s" s + | LString s -> pf "\"%s\"" s + | LNil -> pf "()" + | LCons (a, b) -> pf "(%s)" (dbg_print_list (LCons (a,b))) + | LDouble d -> pf "%f" d + | LQuoted v -> pf "'%s" (pretty_print_one v) + | LBuiltinSpecial _ + | LBuiltinFunction _ + | LLambda _ + | LFunction _ + | LUnnamedMacro _ + | LMacro _ -> dbg_print_one v + +let pretty_print_all vs = + let pr v = Printf.printf "%s\n" (pretty_print_one v) in + List.iter pr vs + +let dbg_print_all vs = + let pr v = Printf.printf "%s\n" (dbg_print_one v) in + List.iter pr vs + + +let rec convert_one = function + | Parser.Ast.LInt x -> LInt x + | Parser.Ast.LDouble x -> LDouble x + | Parser.Ast.LNil -> LNil + | Parser.Ast.LString s -> LString s + | Parser.Ast.LSymbol s -> LSymbol s + | Parser.Ast.LCons (a, b) -> LCons (convert_one a, convert_one b) + + +let read_from_str s = + List.map convert_one (Parser.parse_str s) diff --git a/lib/interpreter/dune b/lib/interpreter/dune new file mode 100644 index 0000000..ee90267 --- /dev/null +++ b/lib/interpreter/dune @@ -0,0 +1,4 @@ +(library + (name interpreter) + (libraries parser) + (package ollisp)) diff --git a/lib/interpreter/env.ml b/lib/interpreter/env.ml new file mode 100644 index 0000000..8230dba --- /dev/null +++ b/lib/interpreter/env.ml @@ -0,0 +1,38 @@ +open Ast +(* the type `environment` is defined in Ast *) + +let default_env: environment = [Hashtbl.create 1024];; + +let copy (env : environment) : environment = + List.map Hashtbl.copy env + +let make_env () = copy default_env + +let new_lexical (env : environment) : environment = + let h = Hashtbl.create 16 in + h :: env + +let set_local (env : environment) (s : string) (v : lisp_val) : unit = + match env with + | [] -> () + | e1 :: _ -> Hashtbl.replace e1 s v + +let rec update (env : environment) s v = + match env with + | [] -> () + | e1 :: erest -> + match Hashtbl.find_opt e1 s with + | None -> update erest s v + | Some _ -> Hashtbl.replace e1 s v + +let rec get_root (env : environment) = + match env with + | [] -> raise (Invalid_argument "Empty environment passed to env_root!") + | e :: [] -> e + | _ :: t -> get_root t + +let set_global (env : environment) s v = + Hashtbl.replace (get_root env) s v + +let set_default s v = + set_global default_env s v diff --git a/lib/interpreter/eval.ml b/lib/interpreter/eval.ml new file mode 100644 index 0000000..e130424 --- /dev/null +++ b/lib/interpreter/eval.ml @@ -0,0 +1,76 @@ +open Ast;; + + +(* the type annotations are unnecessary, but help constrain us from a +potentially more general function here *) +let rec eval_sym (env: environment) (s: string) = + match env with + | [] -> raise (Invalid_argument (Printf.sprintf "eval_sym: symbol %s has no value in current scope" s)) + | e :: rest -> + match Hashtbl.find_opt e s with + | None -> eval_sym rest s + | Some v -> v + +let rec eval_one env = function + | LSymbol s -> eval_sym env s + | LCons (func, args) -> eval_call env (eval_one env func) args + | LQuoted v -> v + | v -> v (* All other forms are self-evaluating *) + +(* Evaluate a list of values, without evaluating the resulting +function or macro call. Since macros and functions inherently +look similar, they share a lot of code, which is extracted here *) +and eval_list env l = + match l with + | LNil -> LNil + | LCons (a, b) -> LCons (eval_one env a, eval_list env b) + | _ -> raise (Invalid_argument "eval_list: cannot process non-list") + +and eval_body env body = + match body with + | LNil -> LNil + | LCons (form, LNil) -> eval_one env form + | LCons (form, next) -> ignore (eval_one env form); eval_body env next + | _ -> LNil + +and bind_args env = function + | (LNil, LNil) -> () + | (LSymbol s, v) -> Env.set_local env s v + | (LCons (LSymbol hl, tl), LCons (ha, ta)) -> Env.set_local env hl ha; bind_args env (tl, ta) + | _ -> invalid_arg "cannot bind argument list for function" + +and eval_apply args = function + | LLambda (e, l, b) + | LFunction (_, e, l, b) -> + let lexical_env = Env.new_lexical e in + bind_args lexical_env (l, args); + eval_body lexical_env b + | LUnnamedMacro (e, l, b) + | LMacro (_, e, l, b) -> + let lexical_env = Env.new_lexical e in + bind_args lexical_env (l, args); + eval_body lexical_env b + | v -> + invalid_arg ("Non-macro non-function value passed to eval_apply " + ^ dbg_print_one v) + +and eval_call env func args = + match func with + | LBuiltinSpecial (_, f) -> f env args + | LBuiltinFunction (_, f) -> f env (eval_list env args) + (* The function calls don't happen in the calling environment, + so it makes no sense to pass env to a call. *) + | LLambda _ + | LFunction _ -> eval_apply (eval_list env args) func + (* Macros are the same, they just return code that *will* be evaluated + in the calling environment *) + | LUnnamedMacro _ + | LMacro _ -> eval_one env (eval_apply args func) + | v -> raise (Invalid_argument + (Printf.sprintf "eval_apply: cannot call non-function object %s" (dbg_print_one v))) + +let eval_all env vs = + let ev v = eval_one env v in + List.map ev vs;; + + diff --git a/lib/interpreter/stdlib.ml b/lib/interpreter/stdlib.ml new file mode 100644 index 0000000..5234b77 --- /dev/null +++ b/lib/interpreter/stdlib.ml @@ -0,0 +1,204 @@ +open Ast;; + +(* I feel like the more I get into functional programming, the more insane my code + becomes. What the fuck is this? why do I have a set of functions that combine + binary operators over an arbitrarily long list? I have like. 4 operators. None + of this matters. + + But it's just so... beautiful. + *) +let mathop_do_once int_op float_op = function + | (LDouble v1, LDouble v2) -> LDouble (float_op v1 v2) + | (LDouble v1, LInt v2) -> LDouble (float_op v1 (float_of_int v2)) + | (LInt v1, LDouble v2) -> LDouble (float_op (float_of_int v1) v2) + | (LInt v1, LInt v2) -> LInt (int_op v1 v2) + | _ -> invalid_arg "invalid arguments to mathematical operator" + +let mathop_do_once_curried int_op float_op = + let f = mathop_do_once int_op float_op in + fun x -> fun y -> f (x, y) + +let mathop_reduce fi ff init vs = + let curried = mathop_do_once_curried fi ff in + reduce init curried vs + +let cast_int_to_double = function + | LInt x -> LDouble (float x) + | LDouble x -> LDouble x + | _ -> invalid_arg "can't cast_int_to_double!" + +let add _ vs = + mathop_reduce (+) (+.) (LInt 0) vs +let sub _ = function + | LCons (x, LNil) -> ((mathop_do_once (-) (-.)) (LInt 0, x)) + | LCons (x, rest) -> mathop_reduce (-) (-.) x rest + | _ -> invalid_arg "invalid argument list passed to (-)" +let mul _ vs = + mathop_reduce ( * ) ( *. ) (LInt 1) vs +let div _ vs = + let div_one = mathop_do_once ( / ) ( /. ) in + match vs with + (* (/ x) is equal to 1 / x *) + | LCons (x, LNil) -> div_one (LDouble 1., cast_int_to_double x) + | LCons (x, LCons (y, LNil)) -> div_one (cast_int_to_double x, y) + | _ -> invalid_arg "invalid argument list passed to (/)" + +let rem _ = function + | LCons (x, LCons (y, LNil)) -> + mathop_do_once (mod) (mod_float) (cast_int_to_double x, cast_int_to_double y) + | _ -> invalid_arg "invalid argument list passed to (rem)" + + +let car _ = function + | LCons (a, _) -> a + | _ -> invalid_arg "car: non-cons" +let cdr _ = function + | LCons (_, d) -> d + | _ -> invalid_arg "cdr: non-cons" +let cons _ a b = LCons (a, b) +let lisp_list _ vs = vs + +(* builtin function that updates an existing binding *) +let lisp_set env sym v = + match sym with + | LSymbol s -> Env.update env s v; v + | _ -> invalid_arg ("cannot set non-symbol " ^ dbg_print_one sym) +let lambda env = function + | LCons (l, body) -> + LLambda (env, l, body) + | args -> invalid_arg ("invalid args to fn! " ^ (dbg_print_one args)) +let defn env = function + | LCons (LSymbol s, LCons (l, body)) -> + let f = LFunction (s, env, l, body) in + Env.set_global env s f; f + | args -> invalid_arg ("cannot define function! " ^ (dbg_print_one args)) + +let lambda_macro env = function + | LCons (l, body) -> LUnnamedMacro (env, l, body) + | args -> invalid_arg ("invalid args to fn-macro! " ^ (dbg_print_one args)) +let defmacro env = function + | LCons (LSymbol s, LCons (l, body)) -> + let f = LMacro (s, env, l, body) in + Env.set_global env s f; f + | args -> invalid_arg ("cannot define macro! " ^ (dbg_print_one args)) + + +let lisp_not _ = function + | LCons (LNil, LNil) -> LSymbol "t" + | _ -> LNil;; + +(* This only creates a *local* binding, contained to the body given. *) +let bind_local env = function + | LCons (LSymbol s, LCons (v, body)) -> + let e = Env.new_lexical env in + Env.set_local e s (Eval.eval_one env v); + Eval.eval_body e body + | _ -> invalid_arg "invalid argument to bind-local" + +(* special form that creates a global binding *) +let lisp_define env = function + | LCons (LSymbol s, LCons (v, LNil)) -> + let evaluated = Eval.eval_one env v in + Env.set_global env s evaluated; + evaluated + | _ -> invalid_arg "invalid args to def" + +let lisp_if env = function + | LCons (cond, LCons (if_true, LNil)) -> + (match Eval.eval_one env cond with + | LNil -> LNil + | _ -> Eval.eval_one env if_true) + | LCons (cond, LCons (if_true, LCons (if_false, LNil))) -> + (match Eval.eval_one env cond with + | LNil -> Eval.eval_one env if_false + | _ -> Eval.eval_one env if_true) + | _ -> invalid_arg "invalid argument list passed to if!" + + +open Env;; + + +let bf s f = s, LBuiltinFunction (s, f) +let bf1 s f = + let aux e = function + | LCons (v, LNil) -> f e v + | _ -> invalid_arg ("invalid argument to " ^ s) + in bf s aux +let bf2 s f = + let aux e = function + | LCons (v1, LCons (v2, LNil)) -> f e v1 v2 + | _ -> invalid_arg ("invalid argument to " ^ s) + in bf s aux + +let sp s f = s, LBuiltinSpecial (s, f) +let sp1 s f = + let aux e = function + | LCons (v, LNil) -> f e v + | _ -> invalid_arg ("invalid argument to " ^ s) + in sp s aux +let sp2 s f = + let aux e = function + | LCons (v1, LCons (v2, LNil)) -> f e v1 v2 + | _ -> invalid_arg ("invalid argument to " ^ s) + in sp s aux + + +let add_builtins bs = + List.iter (fun (s, f) -> set_default s f) bs + +(* +(def defn + (fn-macro (name lm . body) + (list 'def name (cons 'fn (cons lm body))))) +(def defmacro + (fn-macro (name lm . body) + (list 'def name (cons 'fn-macro (cons lm body))))) + *) + +let init_script = +" +(defmacro setq (sym val) + (list 'set (list 'quote sym) val)) +(defmacro letfn (sym fun . body) + (cons 'let-one (cons sym (cons '() (cons (list 'setq sym fun) body))))) + +(defn mapcar (f l) + (if l)) +(defn filter (f l) + (letfn helper + (fn (l acc) + (if (nil? l) acc (helper (cdr l) (if (f (car l)) (cons (car l) acc) acc)))) + (helper l '()))) +";; + +let init_default_env () = + add_builtins [ + bf "+" add; bf "-" sub; + bf "*" mul; bf "/" div; + bf1 "car" car; + bf1 "cdr" cdr; + bf2 "cons" cons; + bf "rem" rem; + bf2 "set" lisp_set; + bf "list" lisp_list; + bf "nil?" lisp_not; + bf "not" lisp_not; + + sp "fn" lambda; + sp "defn" defn; + sp "fn-macro" lambda_macro; + sp "defmacro" defmacro; + sp "let-one" bind_local; + sp "def" lisp_define; + sp1 "quote" (fun _ x -> x); + sp "if" lisp_if; + ]; + + (*let () = add_builtin "print" lisp_prin *) + + (* I know this looks insane. please trust me. + Idea: maybe put this in a file instead of putting + literally the entire standard library in a constant string + *) + ignore (Eval.eval_all default_env (read_from_str init_script)); + () diff --git a/lib/parser/ast.ml b/lib/parser/ast.ml new file mode 100644 index 0000000..b122c7e --- /dev/null +++ b/lib/parser/ast.ml @@ -0,0 +1,9 @@ + + +type lisp_ast = + | LInt of int + | LDouble of float + | LSymbol of string + | LString of string + | LNil + | LCons of lisp_ast * lisp_ast diff --git a/lib/parser/dune b/lib/parser/dune new file mode 100644 index 0000000..1cd2e2b --- /dev/null +++ b/lib/parser/dune @@ -0,0 +1,7 @@ +(library + (name parser) + (modules parser lex parse ast) + (package ollisp)) + +(menhir (modules parse)) +(ocamllex lex) diff --git a/lib/parser/lex.mll b/lib/parser/lex.mll new file mode 100644 index 0000000..abff6c9 --- /dev/null +++ b/lib/parser/lex.mll @@ -0,0 +1,35 @@ +{ +open Lexing +open Parse +exception SyntaxError of string + +let strip_quotes s = String.sub s 1 (String.length s - 2);; +} + +let digit = ['0'-'9'] +let number_sign = '-' | '+' +let int = number_sign? digit+ +let double = digit* '.' digit+ | digit+ '.' digit* + +let white = [' ' '\t']+ +let newline = '\r' | '\n' | "\r\n" + +let sym_char = ['a'-'z' 'A'-'Z' '!' '\\' '+' '-' '*' '/' '_' '?'] +let sym = sym_char sym_char* + +let str = '"' [^'"']* '"' + +rule read = + parse + | white { read lexbuf } + | newline { new_line lexbuf; read lexbuf} + | int { INT (int_of_string (Lexing.lexeme lexbuf))} + | double { DOUBLE (float_of_string (Lexing.lexeme lexbuf))} + | sym { SYM (Lexing.lexeme lexbuf)} + | str { STR (strip_quotes (Lexing.lexeme lexbuf))} + | '(' { LPAREN } + | ')' { RPAREN } + | '\'' { QUOTE } + | '.' { DOT } + | _ { raise (SyntaxError ("Unexpected char: " ^ Lexing.lexeme lexbuf))} + | eof { EOF } diff --git a/lib/parser/parse.mly b/lib/parser/parse.mly new file mode 100644 index 0000000..91969bc --- /dev/null +++ b/lib/parser/parse.mly @@ -0,0 +1,36 @@ +%{ + open Ast +%} + +%token INT +%token DOUBLE +%token SYM +%token STR +%token LPAREN +%token RPAREN +%token QUOTE +%token DOT +%token EOF + +%start prog +%% + +prog: + | EOF { None } + | e = expr { Some e } +; + +expr: + | i = INT { LInt i } + | d = DOUBLE { LDouble d} + | s = SYM { LSymbol s } + | s = STR { LString (String.uppercase_ascii s) } + | LPAREN; l = lisp_list_rest { l } + | QUOTE; e = expr { LCons (LSymbol "quote", LCons (e, LNil)) } +; + +lisp_list_rest: + | RPAREN { LNil } + | DOT; e = expr; RPAREN { e } + | e = expr; lr = lisp_list_rest { LCons (e, lr) } +; diff --git a/lib/parser/parser.ml b/lib/parser/parser.ml new file mode 100644 index 0000000..83beecf --- /dev/null +++ b/lib/parser/parser.ml @@ -0,0 +1,16 @@ +let parse_one lb = Parse.prog (Lex.read) lb + +let parse lb = + let rec helper () = + match parse_one lb with + | None -> [] + | Some (t) -> t :: helper () + in + helper () + +let parse_str s = + parse (Lexing.from_string s) + + +module Ast = Ast +module Parse = Parse