feat: add small Forth implementation in C

Co-authored-by: aider (openrouter/tencent/hy3-preview:free) <aider@aider.chat>
2026-05-03 16:27:52 +03:00
commit c9584ccb26
1 changed files with 384 additions and 0 deletions
@@ -0,0 +1,384 @@
+/*
+ * Small Forth Implementation in C
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdint.h>
+
+// Configuration
+#define DATA_STACK_SIZE 256
+#define RET_STACK_SIZE 256
+#define DICT_SIZE 256
+#define BODY_SIZE 1024
+#define COMPILE_BUF_SIZE 1024
+#define INPUT_BUF_SIZE 256
+#define MAX_NAME_LEN 31
+
+// Core types
+typedef struct Word Word;
+typedef union Cell {
+    Word* word;
+    int32_t num;
+} Cell;
+
+struct Word {
+    Word* prev;
+    uint8_t flags; // Bit7=immediate, Bit6=hidden, Bits0-5=name length
+    char name[MAX_NAME_LEN + 1];
+    void (*code)(Word*);
+    Cell* body;
+};
+
+// Globals
+int32_t data_stack[DATA_STACK_SIZE];
+int sp = -1;
+Cell* ret_stack[RET_STACK_SIZE];
+int rp = -1;
+Cell* ip = NULL;
+
+Word dict[DICT_SIZE];
+int dict_idx = 0;
+Word* dict_head = NULL;
+Cell dict_bodies[BODY_SIZE];
+int body_idx = 0;
+
+int state = 0; // 0=interpret, 1=compile
+Cell compile_buf[COMPILE_BUF_SIZE];
+int compile_idx = 0;
+char compiling_name[MAX_NAME_LEN + 1];
+
+char input_buf[INPUT_BUF_SIZE];
+char* input_ptr = NULL;
+
+// Stack helpers
+void data_push(int32_t val) {
+    if (sp < DATA_STACK_SIZE - 1) {
+        data_stack[++sp] = val;
+    } else {
+        printf("Data stack overflow\n");
+    }
+}
+
+int32_t data_pop(void) {
+    if (sp >= 0) {
+        return data_stack[sp--];
+    } else {
+        printf("Data stack underflow\n");
+        return 0;
+    }
+}
+
+void ret_push(Cell* val) {
+    if (rp < RET_STACK_SIZE - 1) {
+        ret_stack[++rp] = val;
+    } else {
+        printf("Return stack overflow\n");
+    }
+}
+
+Cell* ret_pop(void) {
+    if (rp >= 0) {
+        return ret_stack[rp--];
+    } else {
+        printf("Return stack underflow\n");
+        return NULL;
+    }
+}
+
+// Dictionary helpers
+Word* add_primitive(const char* name, void (*code)(Word*), uint8_t flags) {
+    if (dict_idx >= DICT_SIZE) {
+        printf("Dictionary full\n");
+        return NULL;
+    }
+    Word* w = &dict[dict_idx++];
+    w->prev = dict_head;
+    dict_head = w;
+
+    size_t len = strlen(name);
+    if (len > MAX_NAME_LEN) len = MAX_NAME_LEN;
+    w->flags = flags | (uint8_t)len;
+    strncpy(w->name, name, len);
+    w->name[len] = '\0';
+    w->code = code;
+    w->body = NULL;
+    return w;
+}
+
+Word* lookup_word(const char* name) {
+    for (Word* w = dict_head; w != NULL; w = w->prev) {
+        if (w->flags & (1 << 6)) continue; // Skip hidden words
+        if (strcmp(w->name, name) == 0) return w;
+    }
+    return NULL;
+}
+
+// Input tokenizer
+char* next_token(void) {
+    if (input_ptr == NULL) return NULL;
+    while (*input_ptr != '\0' && isspace((unsigned char)*input_ptr)) {
+        input_ptr++;
+    }
+    if (*input_ptr == '\0') return NULL;
+    char* start = input_ptr;
+    while (*input_ptr != '\0' && !isspace((unsigned char)*input_ptr)) {
+        input_ptr++;
+    }
+    if (*input_ptr != '\0') {
+        *input_ptr = '\0';
+        input_ptr++;
+    }
+    return start;
+}
+
+// Primitive word implementations
+void do_dup(Word* w) {
+    if (sp < 0) return;
+    int32_t v = data_stack[sp];
+    data_push(v);
+}
+
+void do_drop(Word* w) {
+    data_pop();
+}
+
+void do_swap(Word* w) {
+    if (sp < 1) return;
+    int32_t a = data_stack[sp-1];
+    int32_t b = data_stack[sp];
+    data_stack[sp-1] = b;
+    data_stack[sp] = a;
+}
+
+void do_over(Word* w) {
+    if (sp < 1) return;
+    data_push(data_stack[sp-1]);
+}
+
+void do_add(Word* w) {
+    if (sp < 1) return;
+    int32_t b = data_pop();
+    int32_t a = data_pop();
+    data_push(a + b);
+}
+
+void do_sub(Word* w) {
+    if (sp < 1) return;
+    int32_t b = data_pop();
+    int32_t a = data_pop();
+    data_push(a - b);
+}
+
+void do_mul(Word* w) {
+    if (sp < 1) return;
+    int32_t b = data_pop();
+    int32_t a = data_pop();
+    data_push(a * b);
+}
+
+void do_div(Word* w) {
+    if (sp < 1) return;
+    int32_t b = data_pop();
+    int32_t a = data_pop();
+    if (b == 0) {
+        printf("Division by zero\n");
+        data_push(a);
+        data_push(b);
+        return;
+    }
+    data_push(a / b);
+}
+
+void do_dot(Word* w) {
+    if (sp < 0) return;
+    printf("%d ", data_pop());
+    fflush(stdout);
+}
+
+void do_cr(Word* w) {
+    printf("\n");
+    fflush(stdout);
+}
+
+void do_exit(Word* w) {
+    Cell* ret_addr = ret_pop();
+    ip = ret_addr;
+}
+
+void do_docolon(Word* w) {
+    // Push current ip (return address) onto return stack
+    ret_push(ip);
+    // Set ip to this word's body
+    ip = w->body;
+}
+
+void do_lit(Word* w) {
+    // ip points to the number cell (inner interpreter already incremented past lit word)
+    data_push(ip->num);
+    ip++; // Move past number cell
+}
+
+void do_colon(Word* w) {
+    char* name = next_token();
+    if (name == NULL) {
+        printf("':' expects a name\n");
+        return;
+    }
+    strncpy(compiling_name, name, MAX_NAME_LEN);
+    compiling_name[MAX_NAME_LEN] = '\0';
+    state = 1; // Enter compile mode
+    compile_idx = 0; // Reset compile buffer
+}
+
+void do_semicolon(Word* w) {
+    if (state != 1) {
+        printf("';' is only valid in compile mode\n");
+        return;
+    }
+    Word* exit_w = lookup_word("exit");
+    if (exit_w == NULL) {
+        printf("Fatal: exit word not found\n");
+        return;
+    }
+    if (compile_idx >= COMPILE_BUF_SIZE) {
+        printf("Compile buffer overflow\n");
+        return;
+    }
+    compile_buf[compile_idx++] = (Cell){.word = exit_w};
+
+    // Copy compiled body to dictionary body storage
+    if (body_idx + compile_idx > BODY_SIZE) {
+        printf("Dictionary body storage full\n");
+        return;
+    }
+    memcpy(&dict_bodies[body_idx], compile_buf, compile_idx * sizeof(Cell));
+
+    // Create new word entry
+    if (dict_idx >= DICT_SIZE) {
+        printf("Dictionary full\n");
+        return;
+    }
+    Word* new_w = &dict[dict_idx++];
+    new_w->prev = dict_head;
+    dict_head = new_w;
+
+    size_t len = strlen(compiling_name);
+    if (len > MAX_NAME_LEN) len = MAX_NAME_LEN;
+    new_w->flags = (uint8_t)len; // No hidden, no immediate
+    strncpy(new_w->name, compiling_name, len);
+    new_w->name[len] = '\0';
+    new_w->code = do_docolon;
+    new_w->body = &dict_bodies[body_idx];
+
+    body_idx += compile_idx;
+    state = 0; // Back to interpret mode
+}
+
+// Interpreter functions
+void inner_interpreter(void) {
+    while (ip != NULL) {
+        Cell current = *ip;
+        ip++; // Move to next cell
+        current.word->code(current.word);
+    }
+}
+
+void process_token(const char* token) {
+    Word* w = lookup_word(token);
+    if (w != NULL) {
+        if (state == 0) { // Interpret mode
+            if (w->code == do_docolon) { // Colon definition
+                ret_push(NULL); // Return address to stop interpreter
+                ip = w->body;
+                inner_interpreter();
+            } else { // Primitive word
+                w->code(w);
+            }
+        } else { // Compile mode
+            if (w->flags & (1 << 7)) { // Immediate word: execute now
+                if (w->code == do_docolon) {
+                    ret_push(NULL);
+                    ip = w->body;
+                    inner_interpreter();
+                } else {
+                    w->code(w);
+                }
+            } else { // Normal word: compile into current definition
+                if (compile_idx >= COMPILE_BUF_SIZE) {
+                    printf("Compile buffer full\n");
+                    return;
+                }
+                compile_buf[compile_idx++] = (Cell){.word = w};
+            }
+        }
+    } else { // Not a known word: try to parse as number
+        char* end;
+        long v = strtol(token, &end, 10);
+        if (end != token && *end == '\0') { // Valid integer
+            if (state == 0) { // Interpret mode: push number
+                data_push((int32_t)v);
+            } else { // Compile mode: compile lit + number
+                Word* lit_w = lookup_word("lit");
+                if (lit_w == NULL) {
+                    printf("Fatal: lit word not found\n");
+                    return;
+                }
+                if (compile_idx + 2 > COMPILE_BUF_SIZE) {
+                    printf("Compile buffer full\n");
+                    return;
+                }
+                compile_buf[compile_idx++] = (Cell){.word = lit_w};
+                compile_buf[compile_idx++] = (Cell){.num = (int32_t)v};
+            }
+        } else {
+            printf("Unknown word: '%s'\n", token);
+        }
+    }
+}
+
+void outer_interpreter(void) {
+    while (1) {
+        printf("ok ");
+        fflush(stdout);
+        if (fgets(input_buf, INPUT_BUF_SIZE, stdin) == NULL) {
+            break; // EOF
+        }
+        input_ptr = input_buf;
+        char* tok;
+        while ((tok = next_token()) != NULL) {
+            process_token(tok);
+        }
+    }
+    printf("\n");
+}
+
+int main(void) {
+    // Register primitive words
+    // Hidden words first
+    add_primitive("exit", do_exit, 0);
+    add_primitive("docolon", do_docolon, 1 << 6); // Hidden
+    add_primitive("lit", do_lit, 1 << 6); // Hidden
+
+    // Public primitives
+    add_primitive("dup", do_dup, 0);
+    add_primitive("drop", do_drop, 0);
+    add_primitive("swap", do_swap, 0);
+    add_primitive("over", do_over, 0);
+    add_primitive("+", do_add, 0);
+    add_primitive("-", do_sub, 0);
+    add_primitive("*", do_mul, 0);
+    add_primitive("/", do_div, 0);
+    add_primitive(".", do_dot, 0);
+    add_primitive("cr", do_cr, 0);
+
+    // Compilation words
+    add_primitive(":", do_colon, 0);
+    add_primitive(";", do_semicolon, 1 << 7); // Immediate word
+
+    // Start outer interpreter
+    outer_interpreter();
+    return 0;
+}