Reorganized project

This commit is contained in:
2025-12-08 22:25:36 +03:00
parent 3f0a860a03
commit 4faf309752
15 changed files with 621 additions and 0 deletions

1
.envrc Normal file
View File

@@ -0,0 +1 @@
use flake

2
.gitignore vendored
View File

@@ -1,2 +1,4 @@
_build
*~
.direnv
result

23
flake.nix Normal file
View File

@@ -0,0 +1,23 @@
{
description = "a lisp interpreter/compiler in ocaml";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
};
outputs = {self, nixpkgs}:
let
pkgs = nixpkgs.legacyPackages.x86_64-linux;
in
{
packages.x86_64-linux.default = pkgs.ocamlPackages.buildDunePackage {
pname = "ollisp";
version = "0.0.1";
src = pkgs.lib.cleanSource ./.;
preBuildPhase = "ls -R";
nativeBuildInputs = with pkgs; [
ocamlPackages.menhir
];
};
};
}

View File

@@ -0,0 +1,25 @@
open Parser.Ast;;
(* This type represents an intermediate step between the AST and opcodes in our
compiler. We need this extra step to resolve addresses, e.g. how do you know
what exact address an if expression needs to jump to before you compile it?
you don't, you just keep a symbolic label there, resolve later.
*)
type intermediate_opcode =
| ISelect of string * string
| ILDF of string
| ILD of int (* an index into the constant table *)
| INil
| IRet
| IAdd
| IJoin
| ILabel of string (* does not emit any byte code *)
(* TODO: Complete *)
let (compile : lisp_ast -> intermediate_opcode list) = function
| LInt x -> [ILD x]
| _ -> [];;

3
lib/compiler/dune Normal file
View File

@@ -0,0 +1,3 @@
(library
(name compiler)
(libraries parser))

142
lib/interpreter/ast.ml Normal file
View File

@@ -0,0 +1,142 @@
(* This is different from the lisp_ast data returned by the parser!
We will first need to translate that into this in order to use it.
This representation includes things that can only occur during runtime,
like the various kinds of functions and macros.
Additionally, since this is an interpreter, macros tend to be a little
awkward in that they behave exactly like the macro gets expanded just
before the result gets executed. This is different from the compiled
behaviour where the macro is evaluated at compile time.
Though of course, with the dynamic nature of lisp, and its capability
to compile more code at runtime, there will naturally be complications.
*)
type lisp_val =
| LInt of int
| LDouble of float
| LCons of lisp_val * lisp_val
| LNil
| LSymbol of string
| LString of string
(* a builtin function is expressed as a name and the ocaml function
that performs the operation. The function should take a list of arguments.
generally, builtin functions should handle their arguments directly,
and eval forms in the environment as necessary. *)
| LBuiltinFunction of string * (environment -> lisp_val -> lisp_val)
| LBuiltinSpecial of string * (environment -> lisp_val -> lisp_val)
(* a function is a name, captured environment, a parameter list, and function body. *)
| LFunction of string * environment * lisp_val * lisp_val
| LLambda of environment * lisp_val * lisp_val
(* a macro is exactly the same as a function, with the distinction
that it receives all of its arguments completely unevaluated
*)
| LMacro of string * environment * lisp_val * lisp_val
| LUnnamedMacro of environment * lisp_val * lisp_val
| LQuoted of lisp_val
(* the environment type needs to be defined here, as it is mutually
recursive with lisp_val *)
and environment = (string, lisp_val) Hashtbl.t list
(* It is clear that we need some primitives for working with the lisp
data structures.
For example, the LCons and LNil values, together, form a linked list.
This is the intended form of all source code in lisp, yet because
we are using our own implementation of a linked list instead of
ocaml's List, we can not use its many functions.
It may be tempting to switch to a different implementation.
Remember however, that classic lisp semantics allow for the
CDR component of a cons cell (the part that would point to the
next member) to be of a type other than the list itself.
*)
let reverse vs =
let rec aux prev = function
| LNil -> prev
| LCons (v, next) -> aux (LCons (v, prev)) next
| _ -> invalid_arg "cannot reverse non-list!"
in aux LNil vs
let map f =
let rec aux accum = function
| LNil -> reverse accum
| LCons (v, next) -> aux (LCons (f v, accum)) next
| _ -> invalid_arg "cannot map over non-list!"
in aux LNil
let reduce init f =
let rec aux accum = function
| LNil -> accum
| LCons (v, next) -> aux (f accum v) next
| _ -> invalid_arg "cannot reduce over non-list!"
in aux init
let rec dbg_print_list =
let pf = Printf.sprintf in
function
| LCons (v, LNil) -> pf "%s" (dbg_print_one v)
| LCons (v, rest) -> (pf "%s " (dbg_print_one v)) ^ (dbg_print_list rest)
| v -> pf ". %s" (dbg_print_one v)
and dbg_print_one v =
let pf = Printf.sprintf in
match v with
| LInt x -> pf "<int: %d>" x
| LSymbol s -> pf "<symbol: '%s'>" s
| LString s -> pf "<string: '%s'>" s
| LNil -> pf "<nil>"
| LCons _ -> pf "<list: (%s)>" (dbg_print_list v)
| LDouble d -> pf "<double: %f>" d
| LBuiltinSpecial (name, _)
| LBuiltinFunction (name, _) -> pf "<builtin: %s>" name
| LLambda (_, args, _) -> pf "<unnamed function, lambda-list: %s>"
(dbg_print_one args)
| LFunction (name, _, args, _) -> pf "<function: '%s' lambda-list: %s>"
name (dbg_print_one args)
| LUnnamedMacro (_, args, _) -> pf "<unnamed macro, lambda-list: %s>"
(dbg_print_one args)
| LMacro (name, _, args, _) -> pf "<macro '%s' lambda-list: %s>"
name (dbg_print_one args)
| LQuoted v -> pf "<quote: %s>" (dbg_print_one v)
(*| _ -> "<Something else>"*)
let rec pretty_print_one v =
let pf = Printf.sprintf in
match v with
| LInt x -> pf "%d" x
| LSymbol s -> pf "%s" s
| LString s -> pf "\"%s\"" s
| LNil -> pf "()"
| LCons (a, b) -> pf "(%s)" (dbg_print_list (LCons (a,b)))
| LDouble d -> pf "%f" d
| LQuoted v -> pf "'%s" (pretty_print_one v)
| LBuiltinSpecial _
| LBuiltinFunction _
| LLambda _
| LFunction _
| LUnnamedMacro _
| LMacro _ -> dbg_print_one v
let pretty_print_all vs =
let pr v = Printf.printf "%s\n" (pretty_print_one v) in
List.iter pr vs
let dbg_print_all vs =
let pr v = Printf.printf "%s\n" (dbg_print_one v) in
List.iter pr vs
let rec convert_one = function
| Parser.Ast.LInt x -> LInt x
| Parser.Ast.LDouble x -> LDouble x
| Parser.Ast.LNil -> LNil
| Parser.Ast.LString s -> LString s
| Parser.Ast.LSymbol s -> LSymbol s
| Parser.Ast.LCons (a, b) -> LCons (convert_one a, convert_one b)
let read_from_str s =
List.map convert_one (Parser.parse_str s)

4
lib/interpreter/dune Normal file
View File

@@ -0,0 +1,4 @@
(library
(name interpreter)
(libraries parser)
(package ollisp))

38
lib/interpreter/env.ml Normal file
View File

@@ -0,0 +1,38 @@
open Ast
(* the type `environment` is defined in Ast *)
let default_env: environment = [Hashtbl.create 1024];;
let copy (env : environment) : environment =
List.map Hashtbl.copy env
let make_env () = copy default_env
let new_lexical (env : environment) : environment =
let h = Hashtbl.create 16 in
h :: env
let set_local (env : environment) (s : string) (v : lisp_val) : unit =
match env with
| [] -> ()
| e1 :: _ -> Hashtbl.replace e1 s v
let rec update (env : environment) s v =
match env with
| [] -> ()
| e1 :: erest ->
match Hashtbl.find_opt e1 s with
| None -> update erest s v
| Some _ -> Hashtbl.replace e1 s v
let rec get_root (env : environment) =
match env with
| [] -> raise (Invalid_argument "Empty environment passed to env_root!")
| e :: [] -> e
| _ :: t -> get_root t
let set_global (env : environment) s v =
Hashtbl.replace (get_root env) s v
let set_default s v =
set_global default_env s v

76
lib/interpreter/eval.ml Normal file
View File

@@ -0,0 +1,76 @@
open Ast;;
(* the type annotations are unnecessary, but help constrain us from a
potentially more general function here *)
let rec eval_sym (env: environment) (s: string) =
match env with
| [] -> raise (Invalid_argument (Printf.sprintf "eval_sym: symbol %s has no value in current scope" s))
| e :: rest ->
match Hashtbl.find_opt e s with
| None -> eval_sym rest s
| Some v -> v
let rec eval_one env = function
| LSymbol s -> eval_sym env s
| LCons (func, args) -> eval_call env (eval_one env func) args
| LQuoted v -> v
| v -> v (* All other forms are self-evaluating *)
(* Evaluate a list of values, without evaluating the resulting
function or macro call. Since macros and functions inherently
look similar, they share a lot of code, which is extracted here *)
and eval_list env l =
match l with
| LNil -> LNil
| LCons (a, b) -> LCons (eval_one env a, eval_list env b)
| _ -> raise (Invalid_argument "eval_list: cannot process non-list")
and eval_body env body =
match body with
| LNil -> LNil
| LCons (form, LNil) -> eval_one env form
| LCons (form, next) -> ignore (eval_one env form); eval_body env next
| _ -> LNil
and bind_args env = function
| (LNil, LNil) -> ()
| (LSymbol s, v) -> Env.set_local env s v
| (LCons (LSymbol hl, tl), LCons (ha, ta)) -> Env.set_local env hl ha; bind_args env (tl, ta)
| _ -> invalid_arg "cannot bind argument list for function"
and eval_apply args = function
| LLambda (e, l, b)
| LFunction (_, e, l, b) ->
let lexical_env = Env.new_lexical e in
bind_args lexical_env (l, args);
eval_body lexical_env b
| LUnnamedMacro (e, l, b)
| LMacro (_, e, l, b) ->
let lexical_env = Env.new_lexical e in
bind_args lexical_env (l, args);
eval_body lexical_env b
| v ->
invalid_arg ("Non-macro non-function value passed to eval_apply "
^ dbg_print_one v)
and eval_call env func args =
match func with
| LBuiltinSpecial (_, f) -> f env args
| LBuiltinFunction (_, f) -> f env (eval_list env args)
(* The function calls don't happen in the calling environment,
so it makes no sense to pass env to a call. *)
| LLambda _
| LFunction _ -> eval_apply (eval_list env args) func
(* Macros are the same, they just return code that *will* be evaluated
in the calling environment *)
| LUnnamedMacro _
| LMacro _ -> eval_one env (eval_apply args func)
| v -> raise (Invalid_argument
(Printf.sprintf "eval_apply: cannot call non-function object %s" (dbg_print_one v)))
let eval_all env vs =
let ev v = eval_one env v in
List.map ev vs;;

204
lib/interpreter/stdlib.ml Normal file
View File

@@ -0,0 +1,204 @@
open Ast;;
(* I feel like the more I get into functional programming, the more insane my code
becomes. What the fuck is this? why do I have a set of functions that combine
binary operators over an arbitrarily long list? I have like. 4 operators. None
of this matters.
But it's just so... beautiful.
*)
let mathop_do_once int_op float_op = function
| (LDouble v1, LDouble v2) -> LDouble (float_op v1 v2)
| (LDouble v1, LInt v2) -> LDouble (float_op v1 (float_of_int v2))
| (LInt v1, LDouble v2) -> LDouble (float_op (float_of_int v1) v2)
| (LInt v1, LInt v2) -> LInt (int_op v1 v2)
| _ -> invalid_arg "invalid arguments to mathematical operator"
let mathop_do_once_curried int_op float_op =
let f = mathop_do_once int_op float_op in
fun x -> fun y -> f (x, y)
let mathop_reduce fi ff init vs =
let curried = mathop_do_once_curried fi ff in
reduce init curried vs
let cast_int_to_double = function
| LInt x -> LDouble (float x)
| LDouble x -> LDouble x
| _ -> invalid_arg "can't cast_int_to_double!"
let add _ vs =
mathop_reduce (+) (+.) (LInt 0) vs
let sub _ = function
| LCons (x, LNil) -> ((mathop_do_once (-) (-.)) (LInt 0, x))
| LCons (x, rest) -> mathop_reduce (-) (-.) x rest
| _ -> invalid_arg "invalid argument list passed to (-)"
let mul _ vs =
mathop_reduce ( * ) ( *. ) (LInt 1) vs
let div _ vs =
let div_one = mathop_do_once ( / ) ( /. ) in
match vs with
(* (/ x) is equal to 1 / x *)
| LCons (x, LNil) -> div_one (LDouble 1., cast_int_to_double x)
| LCons (x, LCons (y, LNil)) -> div_one (cast_int_to_double x, y)
| _ -> invalid_arg "invalid argument list passed to (/)"
let rem _ = function
| LCons (x, LCons (y, LNil)) ->
mathop_do_once (mod) (mod_float) (cast_int_to_double x, cast_int_to_double y)
| _ -> invalid_arg "invalid argument list passed to (rem)"
let car _ = function
| LCons (a, _) -> a
| _ -> invalid_arg "car: non-cons"
let cdr _ = function
| LCons (_, d) -> d
| _ -> invalid_arg "cdr: non-cons"
let cons _ a b = LCons (a, b)
let lisp_list _ vs = vs
(* builtin function that updates an existing binding *)
let lisp_set env sym v =
match sym with
| LSymbol s -> Env.update env s v; v
| _ -> invalid_arg ("cannot set non-symbol " ^ dbg_print_one sym)
let lambda env = function
| LCons (l, body) ->
LLambda (env, l, body)
| args -> invalid_arg ("invalid args to fn! " ^ (dbg_print_one args))
let defn env = function
| LCons (LSymbol s, LCons (l, body)) ->
let f = LFunction (s, env, l, body) in
Env.set_global env s f; f
| args -> invalid_arg ("cannot define function! " ^ (dbg_print_one args))
let lambda_macro env = function
| LCons (l, body) -> LUnnamedMacro (env, l, body)
| args -> invalid_arg ("invalid args to fn-macro! " ^ (dbg_print_one args))
let defmacro env = function
| LCons (LSymbol s, LCons (l, body)) ->
let f = LMacro (s, env, l, body) in
Env.set_global env s f; f
| args -> invalid_arg ("cannot define macro! " ^ (dbg_print_one args))
let lisp_not _ = function
| LCons (LNil, LNil) -> LSymbol "t"
| _ -> LNil;;
(* This only creates a *local* binding, contained to the body given. *)
let bind_local env = function
| LCons (LSymbol s, LCons (v, body)) ->
let e = Env.new_lexical env in
Env.set_local e s (Eval.eval_one env v);
Eval.eval_body e body
| _ -> invalid_arg "invalid argument to bind-local"
(* special form that creates a global binding *)
let lisp_define env = function
| LCons (LSymbol s, LCons (v, LNil)) ->
let evaluated = Eval.eval_one env v in
Env.set_global env s evaluated;
evaluated
| _ -> invalid_arg "invalid args to def"
let lisp_if env = function
| LCons (cond, LCons (if_true, LNil)) ->
(match Eval.eval_one env cond with
| LNil -> LNil
| _ -> Eval.eval_one env if_true)
| LCons (cond, LCons (if_true, LCons (if_false, LNil))) ->
(match Eval.eval_one env cond with
| LNil -> Eval.eval_one env if_false
| _ -> Eval.eval_one env if_true)
| _ -> invalid_arg "invalid argument list passed to if!"
open Env;;
let bf s f = s, LBuiltinFunction (s, f)
let bf1 s f =
let aux e = function
| LCons (v, LNil) -> f e v
| _ -> invalid_arg ("invalid argument to " ^ s)
in bf s aux
let bf2 s f =
let aux e = function
| LCons (v1, LCons (v2, LNil)) -> f e v1 v2
| _ -> invalid_arg ("invalid argument to " ^ s)
in bf s aux
let sp s f = s, LBuiltinSpecial (s, f)
let sp1 s f =
let aux e = function
| LCons (v, LNil) -> f e v
| _ -> invalid_arg ("invalid argument to " ^ s)
in sp s aux
let sp2 s f =
let aux e = function
| LCons (v1, LCons (v2, LNil)) -> f e v1 v2
| _ -> invalid_arg ("invalid argument to " ^ s)
in sp s aux
let add_builtins bs =
List.iter (fun (s, f) -> set_default s f) bs
(*
(def defn
(fn-macro (name lm . body)
(list 'def name (cons 'fn (cons lm body)))))
(def defmacro
(fn-macro (name lm . body)
(list 'def name (cons 'fn-macro (cons lm body)))))
*)
let init_script =
"
(defmacro setq (sym val)
(list 'set (list 'quote sym) val))
(defmacro letfn (sym fun . body)
(cons 'let-one (cons sym (cons '() (cons (list 'setq sym fun) body)))))
(defn mapcar (f l)
(if l))
(defn filter (f l)
(letfn helper
(fn (l acc)
(if (nil? l) acc (helper (cdr l) (if (f (car l)) (cons (car l) acc) acc))))
(helper l '())))
";;
let init_default_env () =
add_builtins [
bf "+" add; bf "-" sub;
bf "*" mul; bf "/" div;
bf1 "car" car;
bf1 "cdr" cdr;
bf2 "cons" cons;
bf "rem" rem;
bf2 "set" lisp_set;
bf "list" lisp_list;
bf "nil?" lisp_not;
bf "not" lisp_not;
sp "fn" lambda;
sp "defn" defn;
sp "fn-macro" lambda_macro;
sp "defmacro" defmacro;
sp "let-one" bind_local;
sp "def" lisp_define;
sp1 "quote" (fun _ x -> x);
sp "if" lisp_if;
];
(*let () = add_builtin "print" lisp_prin *)
(* I know this looks insane. please trust me.
Idea: maybe put this in a file instead of putting
literally the entire standard library in a constant string
*)
ignore (Eval.eval_all default_env (read_from_str init_script));
()

9
lib/parser/ast.ml Normal file
View File

@@ -0,0 +1,9 @@
type lisp_ast =
| LInt of int
| LDouble of float
| LSymbol of string
| LString of string
| LNil
| LCons of lisp_ast * lisp_ast

7
lib/parser/dune Normal file
View File

@@ -0,0 +1,7 @@
(library
(name parser)
(modules parser lex parse ast)
(package ollisp))
(menhir (modules parse))
(ocamllex lex)

35
lib/parser/lex.mll Normal file
View File

@@ -0,0 +1,35 @@
{
open Lexing
open Parse
exception SyntaxError of string
let strip_quotes s = String.sub s 1 (String.length s - 2);;
}
let digit = ['0'-'9']
let number_sign = '-' | '+'
let int = number_sign? digit+
let double = digit* '.' digit+ | digit+ '.' digit*
let white = [' ' '\t']+
let newline = '\r' | '\n' | "\r\n"
let sym_char = ['a'-'z' 'A'-'Z' '!' '\\' '+' '-' '*' '/' '_' '?']
let sym = sym_char sym_char*
let str = '"' [^'"']* '"'
rule read =
parse
| white { read lexbuf }
| newline { new_line lexbuf; read lexbuf}
| int { INT (int_of_string (Lexing.lexeme lexbuf))}
| double { DOUBLE (float_of_string (Lexing.lexeme lexbuf))}
| sym { SYM (Lexing.lexeme lexbuf)}
| str { STR (strip_quotes (Lexing.lexeme lexbuf))}
| '(' { LPAREN }
| ')' { RPAREN }
| '\'' { QUOTE }
| '.' { DOT }
| _ { raise (SyntaxError ("Unexpected char: " ^ Lexing.lexeme lexbuf))}
| eof { EOF }

36
lib/parser/parse.mly Normal file
View File

@@ -0,0 +1,36 @@
%{
open Ast
%}
%token <int> INT
%token <float> DOUBLE
%token <string> SYM
%token <string> STR
%token LPAREN
%token RPAREN
%token QUOTE
%token DOT
%token EOF
%start <lisp_ast option> prog
%%
prog:
| EOF { None }
| e = expr { Some e }
;
expr:
| i = INT { LInt i }
| d = DOUBLE { LDouble d}
| s = SYM { LSymbol s }
| s = STR { LString (String.uppercase_ascii s) }
| LPAREN; l = lisp_list_rest { l }
| QUOTE; e = expr { LCons (LSymbol "quote", LCons (e, LNil)) }
;
lisp_list_rest:
| RPAREN { LNil }
| DOT; e = expr; RPAREN { e }
| e = expr; lr = lisp_list_rest { LCons (e, lr) }
;

16
lib/parser/parser.ml Normal file
View File

@@ -0,0 +1,16 @@
let parse_one lb = Parse.prog (Lex.read) lb
let parse lb =
let rec helper () =
match parse_one lb with
| None -> []
| Some (t) -> t :: helper ()
in
helper ()
let parse_str s =
parse (Lexing.from_string s)
module Ast = Ast
module Parse = Parse