From c9584ccb26e36a6c1edadc9073555a44b74e73e5 Mon Sep 17 00:00:00 2001 From: Emin Arslan Date: Sun, 3 May 2026 16:27:52 +0300 Subject: [PATCH] feat: add small Forth implementation in C Co-authored-by: aider (openrouter/tencent/hy3-preview:free) --- forth.c | 384 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 384 insertions(+) create mode 100644 forth.c diff --git a/forth.c b/forth.c new file mode 100644 index 0000000..c0b77e4 --- /dev/null +++ b/forth.c @@ -0,0 +1,384 @@ +/* + * Small Forth Implementation in C + */ + +#include +#include +#include +#include +#include + +// Configuration +#define DATA_STACK_SIZE 256 +#define RET_STACK_SIZE 256 +#define DICT_SIZE 256 +#define BODY_SIZE 1024 +#define COMPILE_BUF_SIZE 1024 +#define INPUT_BUF_SIZE 256 +#define MAX_NAME_LEN 31 + +// Core types +typedef struct Word Word; +typedef union Cell { + Word* word; + int32_t num; +} Cell; + +struct Word { + Word* prev; + uint8_t flags; // Bit7=immediate, Bit6=hidden, Bits0-5=name length + char name[MAX_NAME_LEN + 1]; + void (*code)(Word*); + Cell* body; +}; + +// Globals +int32_t data_stack[DATA_STACK_SIZE]; +int sp = -1; +Cell* ret_stack[RET_STACK_SIZE]; +int rp = -1; +Cell* ip = NULL; + +Word dict[DICT_SIZE]; +int dict_idx = 0; +Word* dict_head = NULL; +Cell dict_bodies[BODY_SIZE]; +int body_idx = 0; + +int state = 0; // 0=interpret, 1=compile +Cell compile_buf[COMPILE_BUF_SIZE]; +int compile_idx = 0; +char compiling_name[MAX_NAME_LEN + 1]; + +char input_buf[INPUT_BUF_SIZE]; +char* input_ptr = NULL; + +// Stack helpers +void data_push(int32_t val) { + if (sp < DATA_STACK_SIZE - 1) { + data_stack[++sp] = val; + } else { + printf("Data stack overflow\n"); + } +} + +int32_t data_pop(void) { + if (sp >= 0) { + return data_stack[sp--]; + } else { + printf("Data stack underflow\n"); + return 0; + } +} + +void ret_push(Cell* val) { + if (rp < RET_STACK_SIZE - 1) { + ret_stack[++rp] = val; + } else { + printf("Return stack overflow\n"); + } +} + +Cell* ret_pop(void) { + if (rp >= 0) { + return ret_stack[rp--]; + } else { + printf("Return stack underflow\n"); + return NULL; + } +} + +// Dictionary helpers +Word* add_primitive(const char* name, void (*code)(Word*), uint8_t flags) { + if (dict_idx >= DICT_SIZE) { + printf("Dictionary full\n"); + return NULL; + } + Word* w = &dict[dict_idx++]; + w->prev = dict_head; + dict_head = w; + + size_t len = strlen(name); + if (len > MAX_NAME_LEN) len = MAX_NAME_LEN; + w->flags = flags | (uint8_t)len; + strncpy(w->name, name, len); + w->name[len] = '\0'; + w->code = code; + w->body = NULL; + return w; +} + +Word* lookup_word(const char* name) { + for (Word* w = dict_head; w != NULL; w = w->prev) { + if (w->flags & (1 << 6)) continue; // Skip hidden words + if (strcmp(w->name, name) == 0) return w; + } + return NULL; +} + +// Input tokenizer +char* next_token(void) { + if (input_ptr == NULL) return NULL; + while (*input_ptr != '\0' && isspace((unsigned char)*input_ptr)) { + input_ptr++; + } + if (*input_ptr == '\0') return NULL; + char* start = input_ptr; + while (*input_ptr != '\0' && !isspace((unsigned char)*input_ptr)) { + input_ptr++; + } + if (*input_ptr != '\0') { + *input_ptr = '\0'; + input_ptr++; + } + return start; +} + +// Primitive word implementations +void do_dup(Word* w) { + if (sp < 0) return; + int32_t v = data_stack[sp]; + data_push(v); +} + +void do_drop(Word* w) { + data_pop(); +} + +void do_swap(Word* w) { + if (sp < 1) return; + int32_t a = data_stack[sp-1]; + int32_t b = data_stack[sp]; + data_stack[sp-1] = b; + data_stack[sp] = a; +} + +void do_over(Word* w) { + if (sp < 1) return; + data_push(data_stack[sp-1]); +} + +void do_add(Word* w) { + if (sp < 1) return; + int32_t b = data_pop(); + int32_t a = data_pop(); + data_push(a + b); +} + +void do_sub(Word* w) { + if (sp < 1) return; + int32_t b = data_pop(); + int32_t a = data_pop(); + data_push(a - b); +} + +void do_mul(Word* w) { + if (sp < 1) return; + int32_t b = data_pop(); + int32_t a = data_pop(); + data_push(a * b); +} + +void do_div(Word* w) { + if (sp < 1) return; + int32_t b = data_pop(); + int32_t a = data_pop(); + if (b == 0) { + printf("Division by zero\n"); + data_push(a); + data_push(b); + return; + } + data_push(a / b); +} + +void do_dot(Word* w) { + if (sp < 0) return; + printf("%d ", data_pop()); + fflush(stdout); +} + +void do_cr(Word* w) { + printf("\n"); + fflush(stdout); +} + +void do_exit(Word* w) { + Cell* ret_addr = ret_pop(); + ip = ret_addr; +} + +void do_docolon(Word* w) { + // Push current ip (return address) onto return stack + ret_push(ip); + // Set ip to this word's body + ip = w->body; +} + +void do_lit(Word* w) { + // ip points to the number cell (inner interpreter already incremented past lit word) + data_push(ip->num); + ip++; // Move past number cell +} + +void do_colon(Word* w) { + char* name = next_token(); + if (name == NULL) { + printf("':' expects a name\n"); + return; + } + strncpy(compiling_name, name, MAX_NAME_LEN); + compiling_name[MAX_NAME_LEN] = '\0'; + state = 1; // Enter compile mode + compile_idx = 0; // Reset compile buffer +} + +void do_semicolon(Word* w) { + if (state != 1) { + printf("';' is only valid in compile mode\n"); + return; + } + Word* exit_w = lookup_word("exit"); + if (exit_w == NULL) { + printf("Fatal: exit word not found\n"); + return; + } + if (compile_idx >= COMPILE_BUF_SIZE) { + printf("Compile buffer overflow\n"); + return; + } + compile_buf[compile_idx++] = (Cell){.word = exit_w}; + + // Copy compiled body to dictionary body storage + if (body_idx + compile_idx > BODY_SIZE) { + printf("Dictionary body storage full\n"); + return; + } + memcpy(&dict_bodies[body_idx], compile_buf, compile_idx * sizeof(Cell)); + + // Create new word entry + if (dict_idx >= DICT_SIZE) { + printf("Dictionary full\n"); + return; + } + Word* new_w = &dict[dict_idx++]; + new_w->prev = dict_head; + dict_head = new_w; + + size_t len = strlen(compiling_name); + if (len > MAX_NAME_LEN) len = MAX_NAME_LEN; + new_w->flags = (uint8_t)len; // No hidden, no immediate + strncpy(new_w->name, compiling_name, len); + new_w->name[len] = '\0'; + new_w->code = do_docolon; + new_w->body = &dict_bodies[body_idx]; + + body_idx += compile_idx; + state = 0; // Back to interpret mode +} + +// Interpreter functions +void inner_interpreter(void) { + while (ip != NULL) { + Cell current = *ip; + ip++; // Move to next cell + current.word->code(current.word); + } +} + +void process_token(const char* token) { + Word* w = lookup_word(token); + if (w != NULL) { + if (state == 0) { // Interpret mode + if (w->code == do_docolon) { // Colon definition + ret_push(NULL); // Return address to stop interpreter + ip = w->body; + inner_interpreter(); + } else { // Primitive word + w->code(w); + } + } else { // Compile mode + if (w->flags & (1 << 7)) { // Immediate word: execute now + if (w->code == do_docolon) { + ret_push(NULL); + ip = w->body; + inner_interpreter(); + } else { + w->code(w); + } + } else { // Normal word: compile into current definition + if (compile_idx >= COMPILE_BUF_SIZE) { + printf("Compile buffer full\n"); + return; + } + compile_buf[compile_idx++] = (Cell){.word = w}; + } + } + } else { // Not a known word: try to parse as number + char* end; + long v = strtol(token, &end, 10); + if (end != token && *end == '\0') { // Valid integer + if (state == 0) { // Interpret mode: push number + data_push((int32_t)v); + } else { // Compile mode: compile lit + number + Word* lit_w = lookup_word("lit"); + if (lit_w == NULL) { + printf("Fatal: lit word not found\n"); + return; + } + if (compile_idx + 2 > COMPILE_BUF_SIZE) { + printf("Compile buffer full\n"); + return; + } + compile_buf[compile_idx++] = (Cell){.word = lit_w}; + compile_buf[compile_idx++] = (Cell){.num = (int32_t)v}; + } + } else { + printf("Unknown word: '%s'\n", token); + } + } +} + +void outer_interpreter(void) { + while (1) { + printf("ok "); + fflush(stdout); + if (fgets(input_buf, INPUT_BUF_SIZE, stdin) == NULL) { + break; // EOF + } + input_ptr = input_buf; + char* tok; + while ((tok = next_token()) != NULL) { + process_token(tok); + } + } + printf("\n"); +} + +int main(void) { + // Register primitive words + // Hidden words first + add_primitive("exit", do_exit, 0); + add_primitive("docolon", do_docolon, 1 << 6); // Hidden + add_primitive("lit", do_lit, 1 << 6); // Hidden + + // Public primitives + add_primitive("dup", do_dup, 0); + add_primitive("drop", do_drop, 0); + add_primitive("swap", do_swap, 0); + add_primitive("over", do_over, 0); + add_primitive("+", do_add, 0); + add_primitive("-", do_sub, 0); + add_primitive("*", do_mul, 0); + add_primitive("/", do_div, 0); + add_primitive(".", do_dot, 0); + add_primitive("cr", do_cr, 0); + + // Compilation words + add_primitive(":", do_colon, 0); + add_primitive(";", do_semicolon, 1 << 7); // Immediate word + + // Start outer interpreter + outer_interpreter(); + return 0; +}