From e5c6fdf9d0796cd642e3b86c12f0ac1133aede6e Mon Sep 17 00:00:00 2001 From: Emin Arslan Date: Sun, 3 May 2026 16:59:53 +0300 Subject: [PATCH] fix: correct Cell union and add interpreter, main, word implementations Co-authored-by: aider (openrouter/tencent/hy3-preview:free) --- forth.h | 2 +- forth_interp.c | 97 ++++++++++++++++++++++ forth_words.c | 212 ++++++++++++++++++++++++++++++++++++++++++++++++- main.c | 74 +++++++++++++++++ 4 files changed, 383 insertions(+), 2 deletions(-) create mode 100644 forth_interp.c create mode 100644 main.c diff --git a/forth.h b/forth.h index 0069945..f5e6e37 100644 --- a/forth.h +++ b/forth.h @@ -24,7 +24,7 @@ typedef struct Word Word; typedef union Cell { Word* word; int32_t num; - Cell* cell_ptr; // For storing ip (Cell*) in return stack + union Cell* cell_ptr; // Use union tag here, as typedef Cell is not available yet } Cell; struct Word { diff --git a/forth_interp.c b/forth_interp.c new file mode 100644 index 0000000..27247a8 --- /dev/null +++ b/forth_interp.c @@ -0,0 +1,97 @@ +#include "forth.h" + +// Input tokenizer +char* next_token(void) { + if (input_ptr == NULL) return NULL; + while (*input_ptr != '\0' && isspace((unsigned char)*input_ptr)) { + input_ptr++; + } + if (*input_ptr == '\0') return NULL; + char* start = input_ptr; + while (*input_ptr != '\0' && !isspace((unsigned char)*input_ptr)) { + input_ptr++; + } + if (*input_ptr != '\0') { + *input_ptr = '\0'; + input_ptr++; + } + return start; +} + +// Interpreter functions +void inner_interpreter(void) { + while (ip != NULL) { + Cell current = *ip; + ip++; // Move to next cell + current.word->code(current.word); + } +} + +void process_token(const char* token) { + Word* w = lookup_word(token); + if (w != NULL) { + if (state == 0) { // Interpret mode + if (w->code == do_docolon) { // Colon definition + ret_push(NULL); // Return address to stop interpreter + ip = w->body; + inner_interpreter(); + } else { // Primitive word + w->code(w); + } + } else { // Compile mode + if (w->flags & (1 << 7)) { // Immediate word: execute now + if (w->code == do_docolon) { + ret_push(NULL); + ip = w->body; + inner_interpreter(); + } else { + w->code(w); + } + } else { // Normal word: compile into current definition + if (compile_idx >= COMPILE_BUF_SIZE) { + printf("Compile buffer full\n"); + return; + } + compile_buf[compile_idx++] = (Cell){.word = w}; + } + } + } else { // Not a known word: try to parse as number + char* end; + long v = strtol(token, &end, 10); + if (end != token && *end == '\0') { // Valid integer + if (state == 0) { // Interpret mode: push number + data_push((int32_t)v); + } else { // Compile mode: compile lit + number + Word* lit_w = lookup_word("lit"); + if (lit_w == NULL) { + printf("Fatal: lit word not found\n"); + return; + } + if (compile_idx + 2 > COMPILE_BUF_SIZE) { + printf("Compile buffer full\n"); + return; + } + compile_buf[compile_idx++] = (Cell){.word = lit_w}; + compile_buf[compile_idx++] = (Cell){.num = (int32_t)v}; + } + } else { + printf("Unknown word: '%s'\n", token); + } + } +} + +void outer_interpreter(void) { + while (1) { + printf("ok "); + fflush(stdout); + if (fgets(input_buf, INPUT_BUF_SIZE, stdin) == NULL) { + break; // EOF + } + input_ptr = input_buf; + char* tok; + while ((tok = next_token()) != NULL) { + process_token(tok); + } + } + printf("\n"); +} diff --git a/forth_words.c b/forth_words.c index 9e70e28..b35c95c 100644 --- a/forth_words.c +++ b/forth_words.c @@ -293,4 +293,214 @@ void do_key(Word* w) { void do_dot_quote(Word* w) { // Immediate word: parse string until " and print/compile if (state == 0) { // Interpret mode: print immediately - // \ No newline at end of file + if (input_ptr == NULL) { + printf("Missing string\n"); + return; + } + // Skip whitespace before opening " + while (*input_ptr != '\0' && isspace((unsigned char)*input_ptr)) { + input_ptr++; + } + if (*input_ptr != '"') { + printf("Expected \" to start string\n"); + return; + } + input_ptr++; // Skip opening " + char* start = input_ptr; + // Find closing " + while (*input_ptr != '\0' && *input_ptr != '"') { + input_ptr++; + } + if (*input_ptr != '"') { + printf("Unterminated string\n"); + return; + } + // Print the string + while (start < input_ptr) { + putchar(*start++); + } + input_ptr++; // Skip closing " + fflush(stdout); + } else { // Compile mode: compile string for runtime + if (input_ptr == NULL) { + printf("Missing string\n"); + return; + } + // Skip whitespace before opening " + while (*input_ptr != '\0' && isspace((unsigned char)*input_ptr)) { + input_ptr++; + } + if (*input_ptr != '"') { + printf("Expected \" to start string\n"); + return; + } + input_ptr++; // Skip opening " + char* start = input_ptr; + // Find closing " + while (*input_ptr != '\0' && *input_ptr != '"') { + input_ptr++; + } + if (*input_ptr != '"') { + printf("Unterminated string\n"); + return; + } + size_t len = input_ptr - start; + // Compile do_dot_quote_inner + Word* inner_w = lookup_word("do_dot_quote_inner"); + if (inner_w == NULL) { + printf("Fatal: do_dot_quote_inner not found\n"); + return; + } + if (compile_idx + 2 + len > COMPILE_BUF_SIZE) { + printf("Compile buffer full\n"); + return; + } + compile_buf[compile_idx++] = (Cell){.word = inner_w}; + compile_buf[compile_idx++] = (Cell){.num = (int32_t)len}; + // Store string characters in compile buffer (each as a num cell) + for (size_t i = 0; i < len; i++) { + compile_buf[compile_idx++] = (Cell){.num = (int32_t)start[i]}; + } + input_ptr++; // Skip closing " + } +} + +void do_dot_quote_inner(Word* w) { + // Runtime: ip points to length cell, followed by string characters + int32_t len = ip->num; + ip++; + for (int32_t i = 0; i < len; i++) { + putchar((char)ip->num); + ip++; + } + fflush(stdout); +} + +void do_words(Word* w) { + printf("Dictionary words:\n"); + for (Word* cur = dict_head; cur != NULL; cur = cur->prev) { + if (cur->flags & (1 << 6)) continue; // Skip hidden + printf("%s ", cur->name); + } + printf("\n"); + fflush(stdout); +} + +// Control flow operations +void do_exit(Word* w) { + Cell* ret_addr = ret_pop_ip(); + ip = ret_addr; +} + +void do_docolon(Word* w) { + // Push current ip (return address) onto return stack + ret_push_ip(ip); + // Set ip to this word's body + ip = w->body; +} + +void do_lit(Word* w) { + // ip points to the number cell (inner interpreter already incremented past lit word) + data_push(ip->num); + ip++; // Move past number cell +} + +void do_colon(Word* w) { + char* name = next_token(); + if (name == NULL) { + printf("':' expects a name\n"); + return; + } + strncpy(compiling_name, name, MAX_NAME_LEN); + compiling_name[MAX_NAME_LEN] = '\0'; + state = 1; // Enter compile mode + compile_idx = 0; // Reset compile buffer +} + +void do_semicolon(Word* w) { + if (state != 1) { + printf("';' is only valid in compile mode\n"); + return; + } + Word* exit_w = lookup_word("exit"); + if (exit_w == NULL) { + printf("Fatal: exit word not found\n"); + return; + } + if (compile_idx >= COMPILE_BUF_SIZE) { + printf("Compile buffer overflow\n"); + return; + } + compile_buf[compile_idx++] = (Cell){.word = exit_w}; + + // Copy compiled body to dictionary body storage + if (body_idx + compile_idx > BODY_SIZE) { + printf("Dictionary body storage full\n"); + return; + } + memcpy(&dict_bodies[body_idx], compile_buf, compile_idx * sizeof(Cell)); + + // Create new word entry + if (dict_idx >= DICT_SIZE) { + printf("Dictionary full\n"); + return; + } + Word* new_w = &dict[dict_idx++]; + new_w->prev = dict_head; + dict_head = new_w; + + size_t len = strlen(compiling_name); + if (len > MAX_NAME_LEN) len = MAX_NAME_LEN; + new_w->flags = (uint8_t)len; // No hidden, no immediate + strncpy(new_w->name, compiling_name, len); + new_w->name[len] = '\0'; + new_w->code = do_docolon; + new_w->body = &dict_bodies[body_idx]; + + body_idx += compile_idx; + state = 0; // Back to interpret mode +} + +void do_branch(Word* w) { + // Unconditional branch: ip points to offset cell + int32_t offset = ip->num; + ip += offset; // Jump offset cells (offset is relative to after the offset cell) +} + +void do_zero_branch(Word* w) { + // Conditional branch: if top of stack is 0, branch + int32_t cond = data_pop(); + if (cond == 0) { + int32_t offset = ip->num; + ip += offset; + } else { + ip++; // Skip offset cell + } +} + +// Memory operations (stubs for now) +void do_fetch(Word* w) { /* TODO */ } +void do_store(Word* w) { /* TODO */ } +void do_plus_store(Word* w) { /* TODO */ } +void do_cfetch(Word* w) { /* TODO */ } +void do_cstore(Word* w) { /* TODO */ } +void do_variable(Word* w) { /* TODO */ } +void do_constant(Word* w) { /* TODO */ } +void do_do_var(Word* w) { /* TODO */ } +void do_do_const(Word* w) { /* TODO */ } +void do_here(Word* w) { /* TODO */ } +void do_allot(Word* w) { /* TODO */ } + +// Return stack operations (stubs for now) +void do_to_r(Word* w) { /* TODO */ } +void do_r_from(Word* w) { /* TODO */ } +void do_r_fetch(Word* w) { /* TODO */ } + +// Additional control flow stubs +void do_if(Word* w) { /* TODO */ } +void do_else(Word* w) { /* TODO */ } +void do_then(Word* w) { /* TODO */ } +void do_begin(Word* w) { /* TODO */ } +void do_until(Word* w) { /* TODO */ } +void do_while(Word* w) { /* TODO */ } +void do_repeat(Word* w) { /* TODO */ } diff --git a/main.c b/main.c new file mode 100644 index 0000000..4410946 --- /dev/null +++ b/main.c @@ -0,0 +1,74 @@ +#include "forth.h" + +int main(void) { + // Register primitive words + // Hidden words first + add_primitive("exit", do_exit, 0); + add_primitive("docolon", do_docolon, 1 << 6); // Hidden + add_primitive("lit", do_lit, 1 << 6); // Hidden + add_primitive("do_dot_quote_inner", do_dot_quote_inner, 1 << 6); // Hidden + add_primitive("0branch", do_zero_branch, 1 << 6); // Hidden (for IF) + add_primitive("branch", do_branch, 1 << 6); // Hidden (for ELSE, BEGIN) + + // Public primitives + // Stack ops + add_primitive("dup", do_dup, 0); + add_primitive("drop", do_drop, 0); + add_primitive("swap", do_swap, 0); + add_primitive("over", do_over, 0); + add_primitive("rot", do_rot, 0); + add_primitive("minus-rot", do_minus_rot, 0); + add_primitive("nip", do_nip, 0); + add_primitive("tuck", do_tuck, 0); + + // Arithmetic + add_primitive("+", do_add, 0); + add_primitive("-", do_sub, 0); + add_primitive("*", do_mul, 0); + add_primitive("/", do_div, 0); + add_primitive("mod", do_mod, 0); + add_primitive("/mod", do_slash_mod, 0); + add_primitive("1+", do_one_plus, 0); + add_primitive("1-", do_one_minus, 0); + add_primitive("2+", do_two_plus, 0); + add_primitive("2-", do_two_minus, 0); + add_primitive("negate", do_negate, 0); + add_primitive("abs", do_abs, 0); + add_primitive("min", do_min, 0); + add_primitive("max", do_max, 0); + + // Logic + add_primitive("and", do_and, 0); + add_primitive("or", do_or, 0); + add_primitive("xor", do_xor, 0); + add_primitive("invert", do_invert, 0); + add_primitive("lshift", do_lshift, 0); + add_primitive("rshift", do_rshift, 0); + + // Comparison + add_primitive("=", do_eq, 0); + add_primitive("<>", do_neq, 0); + add_primitive("<", do_lt, 0); + add_primitive(">", do_gt, 0); + add_primitive("<=", do_lte, 0); + add_primitive(">=", do_gte, 0); + add_primitive("0=", do_zero_eq, 0); + add_primitive("0<", do_zero_lt, 0); + add_primitive("0>", do_zero_gt, 0); + + // I/O + add_primitive(".", do_dot, 0); + add_primitive("cr", do_cr, 0); + add_primitive("emit", do_emit, 0); + add_primitive("key", do_key, 0); + add_primitive(".\"", do_dot_quote, 1 << 7); // Immediate + add_primitive("words", do_words, 0); + + // Compilation words + add_primitive(":", do_colon, 0); + add_primitive(";", do_semicolon, 1 << 7); // Immediate + + // Start outer interpreter + outer_interpreter(); + return 0; +}