ai-forth-experiment/forth.c

/*
 * Small Forth Implementation in C
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdint.h>

// Configuration
#define DATA_STACK_SIZE 256
#define RET_STACK_SIZE 256
#define DICT_SIZE 256
#define BODY_SIZE 1024
#define COMPILE_BUF_SIZE 1024
#define INPUT_BUF_SIZE 256
#define MAX_NAME_LEN 31

// Core types
typedef struct Word Word;
typedef union Cell {
    Word* word;
    int32_t num;
} Cell;

struct Word {
    Word* prev;
    uint8_t flags; // Bit7=immediate, Bit6=hidden, Bits0-5=name length
    char name[MAX_NAME_LEN + 1];
    void (*code)(Word*);
    Cell* body;
};

// Globals
int32_t data_stack[DATA_STACK_SIZE];
int sp = -1;
Cell* ret_stack[RET_STACK_SIZE];
int rp = -1;
Cell* ip = NULL;

Word dict[DICT_SIZE];
int dict_idx = 0;
Word* dict_head = NULL;
Cell dict_bodies[BODY_SIZE];
int body_idx = 0;

int state = 0; // 0=interpret, 1=compile
Cell compile_buf[COMPILE_BUF_SIZE];
int compile_idx = 0;
char compiling_name[MAX_NAME_LEN + 1];

char input_buf[INPUT_BUF_SIZE];
char* input_ptr = NULL;

// Stack helpers
void data_push(int32_t val) {
    if (sp < DATA_STACK_SIZE - 1) {
        data_stack[++sp] = val;
    } else {
        printf("Data stack overflow\n");
    }
}

int32_t data_pop(void) {
    if (sp >= 0) {
        return data_stack[sp--];
    } else {
        printf("Data stack underflow\n");
        return 0;
    }
}

void ret_push(Cell* val) {
    if (rp < RET_STACK_SIZE - 1) {
        ret_stack[++rp] = val;
    } else {
        printf("Return stack overflow\n");
    }
}

Cell* ret_pop(void) {
    if (rp >= 0) {
        return ret_stack[rp--];
    } else {
        printf("Return stack underflow\n");
        return NULL;
    }
}

// Dictionary helpers
Word* add_primitive(const char* name, void (*code)(Word*), uint8_t flags) {
    if (dict_idx >= DICT_SIZE) {
        printf("Dictionary full\n");
        return NULL;
    }
    Word* w = &dict[dict_idx++];
    w->prev = dict_head;
    dict_head = w;

    size_t len = strlen(name);
    if (len > MAX_NAME_LEN) len = MAX_NAME_LEN;
    w->flags = flags | (uint8_t)len;
    strncpy(w->name, name, len);
    w->name[len] = '\0';
    w->code = code;
    w->body = NULL;
    return w;
}

Word* lookup_word(const char* name) {
    for (Word* w = dict_head; w != NULL; w = w->prev) {
        if (w->flags & (1 << 6)) continue; // Skip hidden words
        if (strcmp(w->name, name) == 0) return w;
    }
    return NULL;
}

// Input tokenizer
char* next_token(void) {
    if (input_ptr == NULL) return NULL;
    while (*input_ptr != '\0' && isspace((unsigned char)*input_ptr)) {
        input_ptr++;
    }
    if (*input_ptr == '\0') return NULL;
    char* start = input_ptr;
    while (*input_ptr != '\0' && !isspace((unsigned char)*input_ptr)) {
        input_ptr++;
    }
    if (*input_ptr != '\0') {
        *input_ptr = '\0';
        input_ptr++;
    }
    return start;
}

// Primitive word implementations
void do_dup(Word* w) {
    if (sp < 0) return;
    int32_t v = data_stack[sp];
    data_push(v);
}

void do_drop(Word* w) {
    data_pop();
}

void do_swap(Word* w) {
    if (sp < 1) return;
    int32_t a = data_stack[sp-1];
    int32_t b = data_stack[sp];
    data_stack[sp-1] = b;
    data_stack[sp] = a;
}

void do_over(Word* w) {
    if (sp < 1) return;
    data_push(data_stack[sp-1]);
}

void do_add(Word* w) {
    if (sp < 1) return;
    int32_t b = data_pop();
    int32_t a = data_pop();
    data_push(a + b);
}

void do_sub(Word* w) {
    if (sp < 1) return;
    int32_t b = data_pop();
    int32_t a = data_pop();
    data_push(a - b);
}

void do_mul(Word* w) {
    if (sp < 1) return;
    int32_t b = data_pop();
    int32_t a = data_pop();
    data_push(a * b);
}

void do_div(Word* w) {
    if (sp < 1) return;
    int32_t b = data_pop();
    int32_t a = data_pop();
    if (b == 0) {
        printf("Division by zero\n");
        data_push(a);
        data_push(b);
        return;
    }
    data_push(a / b);
}

void do_dot(Word* w) {
    if (sp < 0) return;
    printf("%d ", data_pop());
    fflush(stdout);
}

void do_cr(Word* w) {
    printf("\n");
    fflush(stdout);
}

void do_exit(Word* w) {
    Cell* ret_addr = ret_pop();
    ip = ret_addr;
}

void do_docolon(Word* w) {
    // Push current ip (return address) onto return stack
    ret_push(ip);
    // Set ip to this word's body
    ip = w->body;
}

void do_lit(Word* w) {
    // ip points to the number cell (inner interpreter already incremented past lit word)
    data_push(ip->num);
    ip++; // Move past number cell
}

void do_colon(Word* w) {
    char* name = next_token();
    if (name == NULL) {
        printf("':' expects a name\n");
        return;
    }
    strncpy(compiling_name, name, MAX_NAME_LEN);
    compiling_name[MAX_NAME_LEN] = '\0';
    state = 1; // Enter compile mode
    compile_idx = 0; // Reset compile buffer
}

void do_semicolon(Word* w) {
    if (state != 1) {
        printf("';' is only valid in compile mode\n");
        return;
    }
    Word* exit_w = lookup_word("exit");
    if (exit_w == NULL) {
        printf("Fatal: exit word not found\n");
        return;
    }
    if (compile_idx >= COMPILE_BUF_SIZE) {
        printf("Compile buffer overflow\n");
        return;
    }
    compile_buf[compile_idx++] = (Cell){.word = exit_w};

    // Copy compiled body to dictionary body storage
    if (body_idx + compile_idx > BODY_SIZE) {
        printf("Dictionary body storage full\n");
        return;
    }
    memcpy(&dict_bodies[body_idx], compile_buf, compile_idx * sizeof(Cell));

    // Create new word entry
    if (dict_idx >= DICT_SIZE) {
        printf("Dictionary full\n");
        return;
    }
    Word* new_w = &dict[dict_idx++];
    new_w->prev = dict_head;
    dict_head = new_w;

    size_t len = strlen(compiling_name);
    if (len > MAX_NAME_LEN) len = MAX_NAME_LEN;
    new_w->flags = (uint8_t)len; // No hidden, no immediate
    strncpy(new_w->name, compiling_name, len);
    new_w->name[len] = '\0';
    new_w->code = do_docolon;
    new_w->body = &dict_bodies[body_idx];

    body_idx += compile_idx;
    state = 0; // Back to interpret mode
}

// Interpreter functions
void inner_interpreter(void) {
    while (ip != NULL) {
        Cell current = *ip;
        ip++; // Move to next cell
        current.word->code(current.word);
    }
}

void process_token(const char* token) {
    Word* w = lookup_word(token);
    if (w != NULL) {
        if (state == 0) { // Interpret mode
            if (w->code == do_docolon) { // Colon definition
                ret_push(NULL); // Return address to stop interpreter
                ip = w->body;
                inner_interpreter();
            } else { // Primitive word
                w->code(w);
            }
        } else { // Compile mode
            if (w->flags & (1 << 7)) { // Immediate word: execute now
                if (w->code == do_docolon) {
                    ret_push(NULL);
                    ip = w->body;
                    inner_interpreter();
                } else {
                    w->code(w);
                }
            } else { // Normal word: compile into current definition
                if (compile_idx >= COMPILE_BUF_SIZE) {
                    printf("Compile buffer full\n");
                    return;
                }
                compile_buf[compile_idx++] = (Cell){.word = w};
            }
        }
    } else { // Not a known word: try to parse as number
        char* end;
        long v = strtol(token, &end, 10);
        if (end != token && *end == '\0') { // Valid integer
            if (state == 0) { // Interpret mode: push number
                data_push((int32_t)v);
            } else { // Compile mode: compile lit + number
                Word* lit_w = lookup_word("lit");
                if (lit_w == NULL) {
                    printf("Fatal: lit word not found\n");
                    return;
                }
                if (compile_idx + 2 > COMPILE_BUF_SIZE) {
                    printf("Compile buffer full\n");
                    return;
                }
                compile_buf[compile_idx++] = (Cell){.word = lit_w};
                compile_buf[compile_idx++] = (Cell){.num = (int32_t)v};
            }
        } else {
            printf("Unknown word: '%s'\n", token);
        }
    }
}

void outer_interpreter(void) {
    while (1) {
        printf("ok ");
        fflush(stdout);
        if (fgets(input_buf, INPUT_BUF_SIZE, stdin) == NULL) {
            break; // EOF
        }
        input_ptr = input_buf;
        char* tok;
        while ((tok = next_token()) != NULL) {
            process_token(tok);
        }
    }
    printf("\n");
}

int main(void) {
    // Register primitive words
    // Hidden words first
    add_primitive("exit", do_exit, 0);
    add_primitive("docolon", do_docolon, 1 << 6); // Hidden
    add_primitive("lit", do_lit, 1 << 6); // Hidden

    // Public primitives
    add_primitive("dup", do_dup, 0);
    add_primitive("drop", do_drop, 0);
    add_primitive("swap", do_swap, 0);
    add_primitive("over", do_over, 0);
    add_primitive("+", do_add, 0);
    add_primitive("-", do_sub, 0);
    add_primitive("*", do_mul, 0);
    add_primitive("/", do_div, 0);
    add_primitive(".", do_dot, 0);
    add_primitive("cr", do_cr, 0);

    // Compilation words
    add_primitive(":", do_colon, 0);
    add_primitive(";", do_semicolon, 1 << 7); // Immediate word

    // Start outer interpreter
    outer_interpreter();
    return 0;
}