diff --git a/Makefile b/Makefile index 5f52207..c241ba1 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CC = gcc -CFLAGS = -Wall -Wextra -g +CFLAGS = -Wall -Wextra -g -std=c11 -D_POSIX_C_SOURCE=200809L LDFLAGS = SRCS = forth_core.c forth_dict.c forth_words.c forth_interp.c main.c OBJS = $(SRCS:.c=.o) diff --git a/README.md b/README.md index 75f9691..8d7b573 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ This is a toy/subset Forth interpreter designed for educational purposes. It pro - **Threaded Code**: The interpreter uses an inner/outer interpreter model. Colon definitions are sequences of word addresses (threaded code) traversed by an instruction pointer. - **Architecture**: - - 32-bit signed integer cells (`int32_t`) + - 64-bit signed integer cells (`int64_t`) - Separate data and return stacks - Fixed-size dictionary, user memory, and compile buffer - Dictionary is a singly-linked list searched linearly @@ -23,8 +23,8 @@ This is a toy/subset Forth interpreter designed for educational purposes. It pro - Memory access (`@`, `!`, `C@`, `C!`, `HERE`, `ALLOT`) - Simple string output (`."`) - **Limitations**: - - Fixed memory limits (dictionary, stacks, user memory) - - 32-bit signed integers only; no floating-point support + - Fixed user memory limit (1M cells); dictionary and stacks grow dynamically + - 64-bit signed integers only; no floating-point support - No file I/O or operating system interface beyond stdin/stdout - No immediate user-defined words or advanced introspection - Single-threaded execution diff --git a/forth.h b/forth.h index b0ab712..56b6ab6 100644 --- a/forth.h +++ b/forth.h @@ -7,10 +7,14 @@ #include #include #include +#include // Configuration (all hard limits removed) #define MAX_NAME_LEN 31 +#define F_IMMEDIATE (1 << 7) +#define F_HIDDEN (1 << 6) + // Core types typedef struct Word Word; typedef union Cell { @@ -45,7 +49,8 @@ extern int32_t compile_idx; extern int32_t compile_cap; extern char compiling_name[MAX_NAME_LEN + 1]; -extern char input_buf[1024]; // fixed for line input +extern char* input_buf; // line buffer (dynamic, managed by getline) +extern size_t input_buf_cap; extern char* input_ptr; extern int64_t *compile_stack; // holds indices into compile_buf @@ -56,6 +61,14 @@ extern Cell *user_mem; extern int64_t user_mem_size; // in cells extern Cell* here; +// Pointers to critical hidden/primitive words (set during init) +extern Word* w_exit; +extern Word* w_docolon; +extern Word* w_lit; +extern Word* w_branch; +extern Word* w_zbranch; +extern Word* w_dot_quote_inner; + // Core function prototypes void data_push(int64_t val); int64_t data_pop(void); diff --git a/forth_core.c b/forth_core.c index 29f5fa1..a7adc58 100644 --- a/forth_core.c +++ b/forth_core.c @@ -18,7 +18,8 @@ int32_t compile_idx = 0; int32_t compile_cap = 0; char compiling_name[MAX_NAME_LEN + 1] = {0}; -char input_buf[1024] = {0}; +char* input_buf = NULL; +size_t input_buf_cap = 0; char* input_ptr = NULL; int64_t *compile_stack = NULL; @@ -29,6 +30,13 @@ Cell *user_mem = NULL; int64_t user_mem_size = 0; Cell* here = NULL; +Word* w_exit = NULL; +Word* w_docolon = NULL; +Word* w_lit = NULL; +Word* w_branch = NULL; +Word* w_zbranch = NULL; +Word* w_dot_quote_inner = NULL; + // ---------- Data stack ---------- void data_push(int64_t val) { if (data_sp + 1 >= data_cap) { diff --git a/forth_interp.c b/forth_interp.c index ee1ef63..980f0ec 100644 --- a/forth_interp.c +++ b/forth_interp.c @@ -73,13 +73,12 @@ void process_token(const char* token) { if (state == 0) { data_push((int64_t)v); } else { // Compile lit + number - Word* lit_w = lookup_word_internal("lit"); - if (!lit_w) { + if (!w_lit) { fprintf(stderr, "Fatal: lit word not found\n"); return; } ensure_compile_cap(2); - compile_buf[compile_idx++] = (Cell){.word = lit_w}; + compile_buf[compile_idx++] = (Cell){.word = w_lit}; compile_buf[compile_idx++] = (Cell){.num = (int64_t)v}; } } else { @@ -92,9 +91,13 @@ void outer_interpreter(void) { while (1) { printf("ok "); fflush(stdout); - if (fgets(input_buf, sizeof(input_buf), stdin) == NULL) { + ssize_t n = getline(&input_buf, &input_buf_cap, stdin); + if (n < 0) { break; } + if (n > 0 && input_buf[n - 1] == '\n') { + input_buf[n - 1] = '\0'; + } input_ptr = input_buf; char* tok; while ((tok = next_token()) != NULL) { diff --git a/forth_words.c b/forth_words.c index e3e878d..3b7058c 100644 --- a/forth_words.c +++ b/forth_words.c @@ -60,10 +60,11 @@ void do_nip(Word* w) { void do_tuck(Word* w) { (void)w; if (data_sp < 1) return; - int64_t a = data_stack[data_sp-1]; - int64_t b = data_stack[data_sp]; - data_push(a); - data_stack[data_sp-2] = b; + int64_t x1 = data_stack[data_sp-1]; + int64_t x2 = data_stack[data_sp]; + data_stack[data_sp-1] = x2; + data_stack[data_sp] = x1; + data_push(x2); } // Arithmetic @@ -97,7 +98,7 @@ void do_div(Word* w) { int64_t b = data_pop(); int64_t a = data_pop(); if (b == 0) { - printf("Division by zero\n"); + fprintf(stderr, "Division by zero\n"); data_push(a); data_push(b); return; @@ -111,7 +112,7 @@ void do_mod(Word* w) { int64_t b = data_pop(); int64_t a = data_pop(); if (b == 0) { - printf("Modulo by zero\n"); + fprintf(stderr, "Modulo by zero\n"); data_push(a); data_push(b); return; @@ -125,7 +126,7 @@ void do_slash_mod(Word* w) { int64_t b = data_pop(); int64_t a = data_pop(); if (b == 0) { - printf("Modulo by zero\n"); + fprintf(stderr, "Modulo by zero\n"); data_push(a); data_push(b); return; @@ -335,30 +336,29 @@ void do_dot_quote(Word* w) { (void)w; if (state == 0) { // Interpret mode: print immediately - if (input_ptr == NULL) { printf("Missing string\n"); return; } + if (input_ptr == NULL) { fprintf(stderr, "Missing string\n"); return; } while (*input_ptr && isspace((unsigned char)*input_ptr)) input_ptr++; - if (*input_ptr != '"') { printf("Expected \" to start string\n"); return; } + if (*input_ptr != '"') { fprintf(stderr, "Expected \" to start string\n"); return; } input_ptr++; char* start = input_ptr; while (*input_ptr && *input_ptr != '"') input_ptr++; - if (*input_ptr != '"') { printf("Unterminated string\n"); return; } + if (*input_ptr != '"') { fprintf(stderr, "Unterminated string\n"); return; } while (start < input_ptr) putchar(*start++); input_ptr++; fflush(stdout); } else { // Compile mode: compile string for runtime - if (input_ptr == NULL) { printf("Missing string\n"); return; } + if (input_ptr == NULL) { fprintf(stderr, "Missing string\n"); return; } while (*input_ptr && isspace((unsigned char)*input_ptr)) input_ptr++; - if (*input_ptr != '"') { printf("Expected \" to start string\n"); return; } + if (*input_ptr != '"') { fprintf(stderr, "Expected \" to start string\n"); return; } input_ptr++; char* start = input_ptr; while (*input_ptr && *input_ptr != '"') input_ptr++; - if (*input_ptr != '"') { printf("Unterminated string\n"); return; } + if (*input_ptr != '"') { fprintf(stderr, "Unterminated string\n"); return; } size_t len = input_ptr - start; - Word* inner_w = lookup_word_internal("do_dot_quote_inner"); - if (!inner_w) { printf("Fatal: do_dot_quote_inner not found\n"); return; } + if (!w_dot_quote_inner) { fprintf(stderr, "Fatal: do_dot_quote_inner not found\n"); return; } ensure_compile_cap(2 + (int32_t)len); - compile_buf[compile_idx++] = (Cell){.word = inner_w}; + compile_buf[compile_idx++] = (Cell){.word = w_dot_quote_inner}; compile_buf[compile_idx++] = (Cell){.num = (int64_t)len}; for (size_t i = 0; i < len; i++) { compile_buf[compile_idx++] = (Cell){.num = (int64_t)start[i]}; @@ -394,7 +394,7 @@ void do_fetch(Word* w) { (void)w; int64_t addr = data_pop(); if (addr < 0 || addr >= user_mem_size) { - printf("Address out of bounds\n"); + fprintf(stderr, "Address out of bounds\n"); return; } data_push(user_mem[addr].num); @@ -405,7 +405,7 @@ void do_store(Word* w) { int64_t addr = data_pop(); int64_t val = data_pop(); if (addr < 0 || addr >= user_mem_size) { - printf("Address out of bounds\n"); + fprintf(stderr, "Address out of bounds\n"); return; } user_mem[addr].num = val; @@ -416,7 +416,7 @@ void do_plus_store(Word* w) { int64_t addr = data_pop(); int64_t val = data_pop(); if (addr < 0 || addr >= user_mem_size) { - printf("Address out of bounds\n"); + fprintf(stderr, "Address out of bounds\n"); return; } user_mem[addr].num += val; @@ -427,7 +427,7 @@ void do_cfetch(Word* w) { int64_t addr = data_pop(); // byte offset int64_t max_byte = user_mem_size * (int64_t)sizeof(Cell); if (addr < 0 || addr >= max_byte) { - printf("Address out of bounds\n"); + fprintf(stderr, "Address out of bounds\n"); return; } uint8_t* base = (uint8_t*)user_mem; @@ -440,7 +440,7 @@ void do_cstore(Word* w) { int64_t val = data_pop(); int64_t max_byte = user_mem_size * (int64_t)sizeof(Cell); if (addr < 0 || addr >= max_byte) { - printf("Address out of bounds\n"); + fprintf(stderr, "Address out of bounds\n"); return; } uint8_t* base = (uint8_t*)user_mem; @@ -457,7 +457,7 @@ void do_allot(Word* w) { (void)w; int64_t n = data_pop(); if (here + n > user_mem + user_mem_size) { - printf("User memory overflow\n"); + fprintf(stderr, "User memory overflow\n"); return; } here += n; @@ -467,11 +467,11 @@ void do_allot(Word* w) { void do_variable(Word* w) { (void)w; char* name = next_token(); - if (!name) { printf("VARIABLE expects a name\n"); return; } + if (!name) { fprintf(stderr, "VARIABLE expects a name\n"); return; } // allocate one cell in user memory for the variable's data if (here + 1 > user_mem + user_mem_size) { - printf("User memory overflow\n"); + fprintf(stderr, "User memory overflow\n"); return; } Cell* var_cell = here; // address of the data cell @@ -497,11 +497,11 @@ void do_constant(Word* w) { (void)w; int64_t val = data_pop(); char* name = next_token(); - if (!name) { printf("CONSTANT expects a name\n"); data_push(val); return; } + if (!name) { fprintf(stderr, "CONSTANT expects a name\n"); data_push(val); return; } // allocate a cell in user memory to hold the constant value if (here + 1 > user_mem + user_mem_size) { - printf("User memory overflow\n"); + fprintf(stderr, "User memory overflow\n"); data_push(val); // restore the value (optional) return; } @@ -547,7 +547,7 @@ void do_r_from(Word* w) { void do_r_fetch(Word* w) { (void)w; - if (rp < 0) { printf("Return stack underflow\n"); return; } + if (rp < 0) { fprintf(stderr, "Return stack underflow\n"); return; } data_push(ret_stack[rp].num); } @@ -572,7 +572,7 @@ void do_lit(Word* w) { void do_colon(Word* w) { (void)w; char* name = next_token(); - if (!name) { printf("':' expects a name\n"); return; } + if (!name) { fprintf(stderr, "':' expects a name\n"); return; } size_t len = strlen(name); if (len > MAX_NAME_LEN) len = MAX_NAME_LEN; memcpy(compiling_name, name, len); @@ -584,12 +584,11 @@ void do_colon(Word* w) { void do_semicolon(Word* w) { (void)w; - if (state != 1) { printf("';' only valid in compile mode\n"); return; } - Word* exit_w = lookup_word("exit"); - if (!exit_w) { printf("Fatal: exit word not found\n"); return; } + if (state != 1) { fprintf(stderr, "';' only valid in compile mode\n"); return; } + if (!w_exit) { fprintf(stderr, "Fatal: exit word not found\n"); return; } ensure_compile_cap(1); - compile_buf[compile_idx++] = (Cell){.word = exit_w}; + compile_buf[compile_idx++] = (Cell){.word = w_exit}; // Create body copy of compiled cells Cell* body_copy = malloc(compile_idx * sizeof(Cell)); @@ -633,11 +632,10 @@ void do_zero_branch(Word* w) { // Control flow using compile stack (indices) void do_if(Word* w) { (void)w; - if (state != 1) { printf("IF only valid in compile mode\n"); return; } - Word* zbranch = lookup_word_internal("0branch"); - if (!zbranch) { printf("Fatal: 0branch not found\n"); return; } + if (state != 1) { fprintf(stderr, "IF only valid in compile mode\n"); return; } + if (!w_zbranch) { fprintf(stderr, "Fatal: 0branch not found\n"); return; } ensure_compile_cap(2); - compile_buf[compile_idx++] = (Cell){.word = zbranch}; + compile_buf[compile_idx++] = (Cell){.word = w_zbranch}; // compile_push current index (where the offset will be placed) compile_push(compile_idx); compile_idx++; // reserve offset cell @@ -645,7 +643,7 @@ void do_if(Word* w) { void do_then(Word* w) { (void)w; - if (state != 1) { printf("THEN only valid in compile mode\n"); return; } + if (state != 1) { fprintf(stderr, "THEN only valid in compile mode\n"); return; } int64_t offset_idx = compile_pop(); if (offset_idx < 0) return; compile_buf[offset_idx].num = compile_idx - offset_idx; @@ -653,52 +651,49 @@ void do_then(Word* w) { void do_else(Word* w) { (void)w; - if (state != 1) { printf("ELSE only valid in compile mode\n"); return; } + if (state != 1) { fprintf(stderr, "ELSE only valid in compile mode\n"); return; } int64_t if_offset_idx = compile_pop(); if (if_offset_idx < 0) return; - Word* branch_w = lookup_word_internal("branch"); - if (!branch_w) { printf("Fatal: branch not found\n"); return; } + if (!w_branch) { fprintf(stderr, "Fatal: branch not found\n"); return; } ensure_compile_cap(2); // resolve IF offset to skip the ELSE branch compile_buf[if_offset_idx].num = (compile_idx + 2) - if_offset_idx; // compile unconditional branch for ELSE part - compile_buf[compile_idx++] = (Cell){.word = branch_w}; + compile_buf[compile_idx++] = (Cell){.word = w_branch}; compile_push(compile_idx); compile_idx++; // reserve offset cell } void do_begin(Word* w) { (void)w; - if (state != 1) { printf("BEGIN only valid in compile mode\n"); return; } + if (state != 1) { fprintf(stderr, "BEGIN only valid in compile mode\n"); return; } compile_push(compile_idx); } void do_until(Word* w) { (void)w; - if (state != 1) { printf("UNTIL only valid in compile mode\n"); return; } + if (state != 1) { fprintf(stderr, "UNTIL only valid in compile mode\n"); return; } int64_t begin_idx = compile_pop(); if (begin_idx < 0) return; - Word* zbranch = lookup_word_internal("0branch"); - if (!zbranch) { printf("Fatal: 0branch not found\n"); return; } + if (!w_zbranch) { fprintf(stderr, "Fatal: 0branch not found\n"); return; } ensure_compile_cap(2); - compile_buf[compile_idx++] = (Cell){.word = zbranch}; + compile_buf[compile_idx++] = (Cell){.word = w_zbranch}; compile_buf[compile_idx++] = (Cell){.num = begin_idx - compile_idx}; } void do_while(Word* w) { (void)w; - if (state != 1) { printf("WHILE only valid in compile mode\n"); return; } + if (state != 1) { fprintf(stderr, "WHILE only valid in compile mode\n"); return; } int64_t begin_idx = compile_pop(); if (begin_idx < 0) return; - Word* zbranch = lookup_word_internal("0branch"); - if (!zbranch) { printf("Fatal: 0branch not found\n"); return; } + if (!w_zbranch) { fprintf(stderr, "Fatal: 0branch not found\n"); return; } ensure_compile_cap(2); - compile_buf[compile_idx++] = (Cell){.word = zbranch}; + compile_buf[compile_idx++] = (Cell){.word = w_zbranch}; int64_t while_offset_idx = compile_idx; compile_idx++; // reserve offset @@ -708,16 +703,15 @@ void do_while(Word* w) { void do_repeat(Word* w) { (void)w; - if (state != 1) { printf("REPEAT only valid in compile mode\n"); return; } + if (state != 1) { fprintf(stderr, "REPEAT only valid in compile mode\n"); return; } int64_t begin_idx = compile_pop(); if (begin_idx < 0) return; int64_t while_offset_idx = compile_pop(); if (while_offset_idx < 0) return; - Word* branch_w = lookup_word_internal("branch"); - if (!branch_w) { printf("Fatal: branch not found\n"); return; } + if (!w_branch) { fprintf(stderr, "Fatal: branch not found\n"); return; } ensure_compile_cap(2); - compile_buf[compile_idx++] = (Cell){.word = branch_w}; + compile_buf[compile_idx++] = (Cell){.word = w_branch}; compile_buf[compile_idx++] = (Cell){.num = begin_idx - compile_idx}; compile_buf[while_offset_idx].num = compile_idx - while_offset_idx; @@ -755,10 +749,9 @@ void do_roll(Word* w) { void do_qdup(Word* w) { (void)w; if (data_sp < 0) return; - int64_t v = data_pop(); + int64_t v = data_stack[data_sp]; if (v != 0) { data_push(v); - data_push(v); } } @@ -780,12 +773,12 @@ void do_2drop(Word* w) { void do_2swap(Word* w) { (void)w; if (data_sp < 3) return; - int64_t d = data_pop(); // x4 - int64_t c = data_pop(); // x3 - int64_t b = data_pop(); // x2 - int64_t a = data_pop(); // x1 - data_push(b); - data_push(a); - data_push(d); - data_push(c); + int64_t x4 = data_pop(); + int64_t x3 = data_pop(); + int64_t x2 = data_pop(); + int64_t x1 = data_pop(); + data_push(x3); + data_push(x4); + data_push(x1); + data_push(x2); } diff --git a/main.c b/main.c index 3447169..13cfae6 100644 --- a/main.c +++ b/main.c @@ -11,12 +11,12 @@ int main(void) { here = user_mem; // Hidden words first - add_primitive("exit", do_exit, 0); - add_primitive("docolon", do_docolon, 1 << 6); - add_primitive("lit", do_lit, 1 << 6); - add_primitive("do_dot_quote_inner", do_dot_quote_inner, 1 << 6); - add_primitive("0branch", do_zero_branch, 1 << 6); - add_primitive("branch", do_branch, 1 << 6); + w_exit = add_primitive("exit", do_exit, 0); + w_docolon = add_primitive("docolon", do_docolon, F_HIDDEN); + w_lit = add_primitive("lit", do_lit, F_HIDDEN); + w_dot_quote_inner = add_primitive("do_dot_quote_inner", do_dot_quote_inner, F_HIDDEN); + w_zbranch = add_primitive("0branch", do_zero_branch, F_HIDDEN); + w_branch = add_primitive("branch", do_branch, F_HIDDEN); // Public primitives // Stack ops @@ -69,7 +69,7 @@ int main(void) { add_primitive("cr", do_cr, 0); add_primitive("emit", do_emit, 0); add_primitive("key", do_key, 0); - add_primitive(".\"", do_dot_quote, 1 << 7); // immediate + add_primitive(".\"", do_dot_quote, F_IMMEDIATE); // immediate add_primitive("words", do_words, 0); // Memory @@ -90,14 +90,14 @@ int main(void) { // Compilation / control flow add_primitive(":", do_colon, 0); - add_primitive(";", do_semicolon, 1 << 7); - add_primitive("if", do_if, 1 << 7); - add_primitive("else", do_else, 1 << 7); - add_primitive("then", do_then, 1 << 7); - add_primitive("begin", do_begin, 1 << 7); - add_primitive("until", do_until, 1 << 7); - add_primitive("while", do_while, 1 << 7); - add_primitive("repeat", do_repeat, 1 << 7); + add_primitive(";", do_semicolon, F_IMMEDIATE); + add_primitive("if", do_if, F_IMMEDIATE); + add_primitive("else", do_else, F_IMMEDIATE); + add_primitive("then", do_then, F_IMMEDIATE); + add_primitive("begin", do_begin, F_IMMEDIATE); + add_primitive("until", do_until, F_IMMEDIATE); + add_primitive("while", do_while, F_IMMEDIATE); + add_primitive("repeat", do_repeat, F_IMMEDIATE); /* Additional words */ add_primitive("depth", do_depth, 0);