feat: add small Forth implementation in C

Co-authored-by: aider (openrouter/tencent/hy3-preview:free) <aider@aider.chat>
This commit is contained in:
2026-05-03 16:27:52 +03:00
commit c9584ccb26
+384
View File
@@ -0,0 +1,384 @@
/*
* Small Forth Implementation in C
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdint.h>
// Configuration
#define DATA_STACK_SIZE 256
#define RET_STACK_SIZE 256
#define DICT_SIZE 256
#define BODY_SIZE 1024
#define COMPILE_BUF_SIZE 1024
#define INPUT_BUF_SIZE 256
#define MAX_NAME_LEN 31
// Core types
typedef struct Word Word;
typedef union Cell {
Word* word;
int32_t num;
} Cell;
struct Word {
Word* prev;
uint8_t flags; // Bit7=immediate, Bit6=hidden, Bits0-5=name length
char name[MAX_NAME_LEN + 1];
void (*code)(Word*);
Cell* body;
};
// Globals
int32_t data_stack[DATA_STACK_SIZE];
int sp = -1;
Cell* ret_stack[RET_STACK_SIZE];
int rp = -1;
Cell* ip = NULL;
Word dict[DICT_SIZE];
int dict_idx = 0;
Word* dict_head = NULL;
Cell dict_bodies[BODY_SIZE];
int body_idx = 0;
int state = 0; // 0=interpret, 1=compile
Cell compile_buf[COMPILE_BUF_SIZE];
int compile_idx = 0;
char compiling_name[MAX_NAME_LEN + 1];
char input_buf[INPUT_BUF_SIZE];
char* input_ptr = NULL;
// Stack helpers
void data_push(int32_t val) {
if (sp < DATA_STACK_SIZE - 1) {
data_stack[++sp] = val;
} else {
printf("Data stack overflow\n");
}
}
int32_t data_pop(void) {
if (sp >= 0) {
return data_stack[sp--];
} else {
printf("Data stack underflow\n");
return 0;
}
}
void ret_push(Cell* val) {
if (rp < RET_STACK_SIZE - 1) {
ret_stack[++rp] = val;
} else {
printf("Return stack overflow\n");
}
}
Cell* ret_pop(void) {
if (rp >= 0) {
return ret_stack[rp--];
} else {
printf("Return stack underflow\n");
return NULL;
}
}
// Dictionary helpers
Word* add_primitive(const char* name, void (*code)(Word*), uint8_t flags) {
if (dict_idx >= DICT_SIZE) {
printf("Dictionary full\n");
return NULL;
}
Word* w = &dict[dict_idx++];
w->prev = dict_head;
dict_head = w;
size_t len = strlen(name);
if (len > MAX_NAME_LEN) len = MAX_NAME_LEN;
w->flags = flags | (uint8_t)len;
strncpy(w->name, name, len);
w->name[len] = '\0';
w->code = code;
w->body = NULL;
return w;
}
Word* lookup_word(const char* name) {
for (Word* w = dict_head; w != NULL; w = w->prev) {
if (w->flags & (1 << 6)) continue; // Skip hidden words
if (strcmp(w->name, name) == 0) return w;
}
return NULL;
}
// Input tokenizer
char* next_token(void) {
if (input_ptr == NULL) return NULL;
while (*input_ptr != '\0' && isspace((unsigned char)*input_ptr)) {
input_ptr++;
}
if (*input_ptr == '\0') return NULL;
char* start = input_ptr;
while (*input_ptr != '\0' && !isspace((unsigned char)*input_ptr)) {
input_ptr++;
}
if (*input_ptr != '\0') {
*input_ptr = '\0';
input_ptr++;
}
return start;
}
// Primitive word implementations
void do_dup(Word* w) {
if (sp < 0) return;
int32_t v = data_stack[sp];
data_push(v);
}
void do_drop(Word* w) {
data_pop();
}
void do_swap(Word* w) {
if (sp < 1) return;
int32_t a = data_stack[sp-1];
int32_t b = data_stack[sp];
data_stack[sp-1] = b;
data_stack[sp] = a;
}
void do_over(Word* w) {
if (sp < 1) return;
data_push(data_stack[sp-1]);
}
void do_add(Word* w) {
if (sp < 1) return;
int32_t b = data_pop();
int32_t a = data_pop();
data_push(a + b);
}
void do_sub(Word* w) {
if (sp < 1) return;
int32_t b = data_pop();
int32_t a = data_pop();
data_push(a - b);
}
void do_mul(Word* w) {
if (sp < 1) return;
int32_t b = data_pop();
int32_t a = data_pop();
data_push(a * b);
}
void do_div(Word* w) {
if (sp < 1) return;
int32_t b = data_pop();
int32_t a = data_pop();
if (b == 0) {
printf("Division by zero\n");
data_push(a);
data_push(b);
return;
}
data_push(a / b);
}
void do_dot(Word* w) {
if (sp < 0) return;
printf("%d ", data_pop());
fflush(stdout);
}
void do_cr(Word* w) {
printf("\n");
fflush(stdout);
}
void do_exit(Word* w) {
Cell* ret_addr = ret_pop();
ip = ret_addr;
}
void do_docolon(Word* w) {
// Push current ip (return address) onto return stack
ret_push(ip);
// Set ip to this word's body
ip = w->body;
}
void do_lit(Word* w) {
// ip points to the number cell (inner interpreter already incremented past lit word)
data_push(ip->num);
ip++; // Move past number cell
}
void do_colon(Word* w) {
char* name = next_token();
if (name == NULL) {
printf("':' expects a name\n");
return;
}
strncpy(compiling_name, name, MAX_NAME_LEN);
compiling_name[MAX_NAME_LEN] = '\0';
state = 1; // Enter compile mode
compile_idx = 0; // Reset compile buffer
}
void do_semicolon(Word* w) {
if (state != 1) {
printf("';' is only valid in compile mode\n");
return;
}
Word* exit_w = lookup_word("exit");
if (exit_w == NULL) {
printf("Fatal: exit word not found\n");
return;
}
if (compile_idx >= COMPILE_BUF_SIZE) {
printf("Compile buffer overflow\n");
return;
}
compile_buf[compile_idx++] = (Cell){.word = exit_w};
// Copy compiled body to dictionary body storage
if (body_idx + compile_idx > BODY_SIZE) {
printf("Dictionary body storage full\n");
return;
}
memcpy(&dict_bodies[body_idx], compile_buf, compile_idx * sizeof(Cell));
// Create new word entry
if (dict_idx >= DICT_SIZE) {
printf("Dictionary full\n");
return;
}
Word* new_w = &dict[dict_idx++];
new_w->prev = dict_head;
dict_head = new_w;
size_t len = strlen(compiling_name);
if (len > MAX_NAME_LEN) len = MAX_NAME_LEN;
new_w->flags = (uint8_t)len; // No hidden, no immediate
strncpy(new_w->name, compiling_name, len);
new_w->name[len] = '\0';
new_w->code = do_docolon;
new_w->body = &dict_bodies[body_idx];
body_idx += compile_idx;
state = 0; // Back to interpret mode
}
// Interpreter functions
void inner_interpreter(void) {
while (ip != NULL) {
Cell current = *ip;
ip++; // Move to next cell
current.word->code(current.word);
}
}
void process_token(const char* token) {
Word* w = lookup_word(token);
if (w != NULL) {
if (state == 0) { // Interpret mode
if (w->code == do_docolon) { // Colon definition
ret_push(NULL); // Return address to stop interpreter
ip = w->body;
inner_interpreter();
} else { // Primitive word
w->code(w);
}
} else { // Compile mode
if (w->flags & (1 << 7)) { // Immediate word: execute now
if (w->code == do_docolon) {
ret_push(NULL);
ip = w->body;
inner_interpreter();
} else {
w->code(w);
}
} else { // Normal word: compile into current definition
if (compile_idx >= COMPILE_BUF_SIZE) {
printf("Compile buffer full\n");
return;
}
compile_buf[compile_idx++] = (Cell){.word = w};
}
}
} else { // Not a known word: try to parse as number
char* end;
long v = strtol(token, &end, 10);
if (end != token && *end == '\0') { // Valid integer
if (state == 0) { // Interpret mode: push number
data_push((int32_t)v);
} else { // Compile mode: compile lit + number
Word* lit_w = lookup_word("lit");
if (lit_w == NULL) {
printf("Fatal: lit word not found\n");
return;
}
if (compile_idx + 2 > COMPILE_BUF_SIZE) {
printf("Compile buffer full\n");
return;
}
compile_buf[compile_idx++] = (Cell){.word = lit_w};
compile_buf[compile_idx++] = (Cell){.num = (int32_t)v};
}
} else {
printf("Unknown word: '%s'\n", token);
}
}
}
void outer_interpreter(void) {
while (1) {
printf("ok ");
fflush(stdout);
if (fgets(input_buf, INPUT_BUF_SIZE, stdin) == NULL) {
break; // EOF
}
input_ptr = input_buf;
char* tok;
while ((tok = next_token()) != NULL) {
process_token(tok);
}
}
printf("\n");
}
int main(void) {
// Register primitive words
// Hidden words first
add_primitive("exit", do_exit, 0);
add_primitive("docolon", do_docolon, 1 << 6); // Hidden
add_primitive("lit", do_lit, 1 << 6); // Hidden
// Public primitives
add_primitive("dup", do_dup, 0);
add_primitive("drop", do_drop, 0);
add_primitive("swap", do_swap, 0);
add_primitive("over", do_over, 0);
add_primitive("+", do_add, 0);
add_primitive("-", do_sub, 0);
add_primitive("*", do_mul, 0);
add_primitive("/", do_div, 0);
add_primitive(".", do_dot, 0);
add_primitive("cr", do_cr, 0);
// Compilation words
add_primitive(":", do_colon, 0);
add_primitive(";", do_semicolon, 1 << 7); // Immediate word
// Start outer interpreter
outer_interpreter();
return 0;
}