fix: correct Cell union and add interpreter, main, word implementations

Co-authored-by: aider (openrouter/tencent/hy3-preview:free) <aider@aider.chat>
This commit is contained in:
2026-05-03 16:59:53 +03:00
parent 0d9b7e3424
commit e5c6fdf9d0
4 changed files with 383 additions and 2 deletions
+1 -1
View File
@@ -24,7 +24,7 @@ typedef struct Word Word;
typedef union Cell {
Word* word;
int32_t num;
Cell* cell_ptr; // For storing ip (Cell*) in return stack
union Cell* cell_ptr; // Use union tag here, as typedef Cell is not available yet
} Cell;
struct Word {
+97
View File
@@ -0,0 +1,97 @@
#include "forth.h"
// Input tokenizer
char* next_token(void) {
if (input_ptr == NULL) return NULL;
while (*input_ptr != '\0' && isspace((unsigned char)*input_ptr)) {
input_ptr++;
}
if (*input_ptr == '\0') return NULL;
char* start = input_ptr;
while (*input_ptr != '\0' && !isspace((unsigned char)*input_ptr)) {
input_ptr++;
}
if (*input_ptr != '\0') {
*input_ptr = '\0';
input_ptr++;
}
return start;
}
// Interpreter functions
void inner_interpreter(void) {
while (ip != NULL) {
Cell current = *ip;
ip++; // Move to next cell
current.word->code(current.word);
}
}
void process_token(const char* token) {
Word* w = lookup_word(token);
if (w != NULL) {
if (state == 0) { // Interpret mode
if (w->code == do_docolon) { // Colon definition
ret_push(NULL); // Return address to stop interpreter
ip = w->body;
inner_interpreter();
} else { // Primitive word
w->code(w);
}
} else { // Compile mode
if (w->flags & (1 << 7)) { // Immediate word: execute now
if (w->code == do_docolon) {
ret_push(NULL);
ip = w->body;
inner_interpreter();
} else {
w->code(w);
}
} else { // Normal word: compile into current definition
if (compile_idx >= COMPILE_BUF_SIZE) {
printf("Compile buffer full\n");
return;
}
compile_buf[compile_idx++] = (Cell){.word = w};
}
}
} else { // Not a known word: try to parse as number
char* end;
long v = strtol(token, &end, 10);
if (end != token && *end == '\0') { // Valid integer
if (state == 0) { // Interpret mode: push number
data_push((int32_t)v);
} else { // Compile mode: compile lit + number
Word* lit_w = lookup_word("lit");
if (lit_w == NULL) {
printf("Fatal: lit word not found\n");
return;
}
if (compile_idx + 2 > COMPILE_BUF_SIZE) {
printf("Compile buffer full\n");
return;
}
compile_buf[compile_idx++] = (Cell){.word = lit_w};
compile_buf[compile_idx++] = (Cell){.num = (int32_t)v};
}
} else {
printf("Unknown word: '%s'\n", token);
}
}
}
void outer_interpreter(void) {
while (1) {
printf("ok ");
fflush(stdout);
if (fgets(input_buf, INPUT_BUF_SIZE, stdin) == NULL) {
break; // EOF
}
input_ptr = input_buf;
char* tok;
while ((tok = next_token()) != NULL) {
process_token(tok);
}
}
printf("\n");
}
+211 -1
View File
@@ -293,4 +293,214 @@ void do_key(Word* w) {
void do_dot_quote(Word* w) {
// Immediate word: parse string until " and print/compile
if (state == 0) { // Interpret mode: print immediately
//
if (input_ptr == NULL) {
printf("Missing string\n");
return;
}
// Skip whitespace before opening "
while (*input_ptr != '\0' && isspace((unsigned char)*input_ptr)) {
input_ptr++;
}
if (*input_ptr != '"') {
printf("Expected \" to start string\n");
return;
}
input_ptr++; // Skip opening "
char* start = input_ptr;
// Find closing "
while (*input_ptr != '\0' && *input_ptr != '"') {
input_ptr++;
}
if (*input_ptr != '"') {
printf("Unterminated string\n");
return;
}
// Print the string
while (start < input_ptr) {
putchar(*start++);
}
input_ptr++; // Skip closing "
fflush(stdout);
} else { // Compile mode: compile string for runtime
if (input_ptr == NULL) {
printf("Missing string\n");
return;
}
// Skip whitespace before opening "
while (*input_ptr != '\0' && isspace((unsigned char)*input_ptr)) {
input_ptr++;
}
if (*input_ptr != '"') {
printf("Expected \" to start string\n");
return;
}
input_ptr++; // Skip opening "
char* start = input_ptr;
// Find closing "
while (*input_ptr != '\0' && *input_ptr != '"') {
input_ptr++;
}
if (*input_ptr != '"') {
printf("Unterminated string\n");
return;
}
size_t len = input_ptr - start;
// Compile do_dot_quote_inner
Word* inner_w = lookup_word("do_dot_quote_inner");
if (inner_w == NULL) {
printf("Fatal: do_dot_quote_inner not found\n");
return;
}
if (compile_idx + 2 + len > COMPILE_BUF_SIZE) {
printf("Compile buffer full\n");
return;
}
compile_buf[compile_idx++] = (Cell){.word = inner_w};
compile_buf[compile_idx++] = (Cell){.num = (int32_t)len};
// Store string characters in compile buffer (each as a num cell)
for (size_t i = 0; i < len; i++) {
compile_buf[compile_idx++] = (Cell){.num = (int32_t)start[i]};
}
input_ptr++; // Skip closing "
}
}
void do_dot_quote_inner(Word* w) {
// Runtime: ip points to length cell, followed by string characters
int32_t len = ip->num;
ip++;
for (int32_t i = 0; i < len; i++) {
putchar((char)ip->num);
ip++;
}
fflush(stdout);
}
void do_words(Word* w) {
printf("Dictionary words:\n");
for (Word* cur = dict_head; cur != NULL; cur = cur->prev) {
if (cur->flags & (1 << 6)) continue; // Skip hidden
printf("%s ", cur->name);
}
printf("\n");
fflush(stdout);
}
// Control flow operations
void do_exit(Word* w) {
Cell* ret_addr = ret_pop_ip();
ip = ret_addr;
}
void do_docolon(Word* w) {
// Push current ip (return address) onto return stack
ret_push_ip(ip);
// Set ip to this word's body
ip = w->body;
}
void do_lit(Word* w) {
// ip points to the number cell (inner interpreter already incremented past lit word)
data_push(ip->num);
ip++; // Move past number cell
}
void do_colon(Word* w) {
char* name = next_token();
if (name == NULL) {
printf("':' expects a name\n");
return;
}
strncpy(compiling_name, name, MAX_NAME_LEN);
compiling_name[MAX_NAME_LEN] = '\0';
state = 1; // Enter compile mode
compile_idx = 0; // Reset compile buffer
}
void do_semicolon(Word* w) {
if (state != 1) {
printf("';' is only valid in compile mode\n");
return;
}
Word* exit_w = lookup_word("exit");
if (exit_w == NULL) {
printf("Fatal: exit word not found\n");
return;
}
if (compile_idx >= COMPILE_BUF_SIZE) {
printf("Compile buffer overflow\n");
return;
}
compile_buf[compile_idx++] = (Cell){.word = exit_w};
// Copy compiled body to dictionary body storage
if (body_idx + compile_idx > BODY_SIZE) {
printf("Dictionary body storage full\n");
return;
}
memcpy(&dict_bodies[body_idx], compile_buf, compile_idx * sizeof(Cell));
// Create new word entry
if (dict_idx >= DICT_SIZE) {
printf("Dictionary full\n");
return;
}
Word* new_w = &dict[dict_idx++];
new_w->prev = dict_head;
dict_head = new_w;
size_t len = strlen(compiling_name);
if (len > MAX_NAME_LEN) len = MAX_NAME_LEN;
new_w->flags = (uint8_t)len; // No hidden, no immediate
strncpy(new_w->name, compiling_name, len);
new_w->name[len] = '\0';
new_w->code = do_docolon;
new_w->body = &dict_bodies[body_idx];
body_idx += compile_idx;
state = 0; // Back to interpret mode
}
void do_branch(Word* w) {
// Unconditional branch: ip points to offset cell
int32_t offset = ip->num;
ip += offset; // Jump offset cells (offset is relative to after the offset cell)
}
void do_zero_branch(Word* w) {
// Conditional branch: if top of stack is 0, branch
int32_t cond = data_pop();
if (cond == 0) {
int32_t offset = ip->num;
ip += offset;
} else {
ip++; // Skip offset cell
}
}
// Memory operations (stubs for now)
void do_fetch(Word* w) { /* TODO */ }
void do_store(Word* w) { /* TODO */ }
void do_plus_store(Word* w) { /* TODO */ }
void do_cfetch(Word* w) { /* TODO */ }
void do_cstore(Word* w) { /* TODO */ }
void do_variable(Word* w) { /* TODO */ }
void do_constant(Word* w) { /* TODO */ }
void do_do_var(Word* w) { /* TODO */ }
void do_do_const(Word* w) { /* TODO */ }
void do_here(Word* w) { /* TODO */ }
void do_allot(Word* w) { /* TODO */ }
// Return stack operations (stubs for now)
void do_to_r(Word* w) { /* TODO */ }
void do_r_from(Word* w) { /* TODO */ }
void do_r_fetch(Word* w) { /* TODO */ }
// Additional control flow stubs
void do_if(Word* w) { /* TODO */ }
void do_else(Word* w) { /* TODO */ }
void do_then(Word* w) { /* TODO */ }
void do_begin(Word* w) { /* TODO */ }
void do_until(Word* w) { /* TODO */ }
void do_while(Word* w) { /* TODO */ }
void do_repeat(Word* w) { /* TODO */ }
+74
View File
@@ -0,0 +1,74 @@
#include "forth.h"
int main(void) {
// Register primitive words
// Hidden words first
add_primitive("exit", do_exit, 0);
add_primitive("docolon", do_docolon, 1 << 6); // Hidden
add_primitive("lit", do_lit, 1 << 6); // Hidden
add_primitive("do_dot_quote_inner", do_dot_quote_inner, 1 << 6); // Hidden
add_primitive("0branch", do_zero_branch, 1 << 6); // Hidden (for IF)
add_primitive("branch", do_branch, 1 << 6); // Hidden (for ELSE, BEGIN)
// Public primitives
// Stack ops
add_primitive("dup", do_dup, 0);
add_primitive("drop", do_drop, 0);
add_primitive("swap", do_swap, 0);
add_primitive("over", do_over, 0);
add_primitive("rot", do_rot, 0);
add_primitive("minus-rot", do_minus_rot, 0);
add_primitive("nip", do_nip, 0);
add_primitive("tuck", do_tuck, 0);
// Arithmetic
add_primitive("+", do_add, 0);
add_primitive("-", do_sub, 0);
add_primitive("*", do_mul, 0);
add_primitive("/", do_div, 0);
add_primitive("mod", do_mod, 0);
add_primitive("/mod", do_slash_mod, 0);
add_primitive("1+", do_one_plus, 0);
add_primitive("1-", do_one_minus, 0);
add_primitive("2+", do_two_plus, 0);
add_primitive("2-", do_two_minus, 0);
add_primitive("negate", do_negate, 0);
add_primitive("abs", do_abs, 0);
add_primitive("min", do_min, 0);
add_primitive("max", do_max, 0);
// Logic
add_primitive("and", do_and, 0);
add_primitive("or", do_or, 0);
add_primitive("xor", do_xor, 0);
add_primitive("invert", do_invert, 0);
add_primitive("lshift", do_lshift, 0);
add_primitive("rshift", do_rshift, 0);
// Comparison
add_primitive("=", do_eq, 0);
add_primitive("<>", do_neq, 0);
add_primitive("<", do_lt, 0);
add_primitive(">", do_gt, 0);
add_primitive("<=", do_lte, 0);
add_primitive(">=", do_gte, 0);
add_primitive("0=", do_zero_eq, 0);
add_primitive("0<", do_zero_lt, 0);
add_primitive("0>", do_zero_gt, 0);
// I/O
add_primitive(".", do_dot, 0);
add_primitive("cr", do_cr, 0);
add_primitive("emit", do_emit, 0);
add_primitive("key", do_key, 0);
add_primitive(".\"", do_dot_quote, 1 << 7); // Immediate
add_primitive("words", do_words, 0);
// Compilation words
add_primitive(":", do_colon, 0);
add_primitive(";", do_semicolon, 1 << 7); // Immediate
// Start outer interpreter
outer_interpreter();
return 0;
}