c9584ccb26
Co-authored-by: aider (openrouter/tencent/hy3-preview:free) <aider@aider.chat>
385 lines
9.3 KiB
C
385 lines
9.3 KiB
C
/*
|
|
* Small Forth Implementation in C
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include <stdint.h>
|
|
|
|
// Configuration
|
|
#define DATA_STACK_SIZE 256
|
|
#define RET_STACK_SIZE 256
|
|
#define DICT_SIZE 256
|
|
#define BODY_SIZE 1024
|
|
#define COMPILE_BUF_SIZE 1024
|
|
#define INPUT_BUF_SIZE 256
|
|
#define MAX_NAME_LEN 31
|
|
|
|
// Core types
|
|
typedef struct Word Word;
|
|
typedef union Cell {
|
|
Word* word;
|
|
int32_t num;
|
|
} Cell;
|
|
|
|
struct Word {
|
|
Word* prev;
|
|
uint8_t flags; // Bit7=immediate, Bit6=hidden, Bits0-5=name length
|
|
char name[MAX_NAME_LEN + 1];
|
|
void (*code)(Word*);
|
|
Cell* body;
|
|
};
|
|
|
|
// Globals
|
|
int32_t data_stack[DATA_STACK_SIZE];
|
|
int sp = -1;
|
|
Cell* ret_stack[RET_STACK_SIZE];
|
|
int rp = -1;
|
|
Cell* ip = NULL;
|
|
|
|
Word dict[DICT_SIZE];
|
|
int dict_idx = 0;
|
|
Word* dict_head = NULL;
|
|
Cell dict_bodies[BODY_SIZE];
|
|
int body_idx = 0;
|
|
|
|
int state = 0; // 0=interpret, 1=compile
|
|
Cell compile_buf[COMPILE_BUF_SIZE];
|
|
int compile_idx = 0;
|
|
char compiling_name[MAX_NAME_LEN + 1];
|
|
|
|
char input_buf[INPUT_BUF_SIZE];
|
|
char* input_ptr = NULL;
|
|
|
|
// Stack helpers
|
|
void data_push(int32_t val) {
|
|
if (sp < DATA_STACK_SIZE - 1) {
|
|
data_stack[++sp] = val;
|
|
} else {
|
|
printf("Data stack overflow\n");
|
|
}
|
|
}
|
|
|
|
int32_t data_pop(void) {
|
|
if (sp >= 0) {
|
|
return data_stack[sp--];
|
|
} else {
|
|
printf("Data stack underflow\n");
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
void ret_push(Cell* val) {
|
|
if (rp < RET_STACK_SIZE - 1) {
|
|
ret_stack[++rp] = val;
|
|
} else {
|
|
printf("Return stack overflow\n");
|
|
}
|
|
}
|
|
|
|
Cell* ret_pop(void) {
|
|
if (rp >= 0) {
|
|
return ret_stack[rp--];
|
|
} else {
|
|
printf("Return stack underflow\n");
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
// Dictionary helpers
|
|
Word* add_primitive(const char* name, void (*code)(Word*), uint8_t flags) {
|
|
if (dict_idx >= DICT_SIZE) {
|
|
printf("Dictionary full\n");
|
|
return NULL;
|
|
}
|
|
Word* w = &dict[dict_idx++];
|
|
w->prev = dict_head;
|
|
dict_head = w;
|
|
|
|
size_t len = strlen(name);
|
|
if (len > MAX_NAME_LEN) len = MAX_NAME_LEN;
|
|
w->flags = flags | (uint8_t)len;
|
|
strncpy(w->name, name, len);
|
|
w->name[len] = '\0';
|
|
w->code = code;
|
|
w->body = NULL;
|
|
return w;
|
|
}
|
|
|
|
Word* lookup_word(const char* name) {
|
|
for (Word* w = dict_head; w != NULL; w = w->prev) {
|
|
if (w->flags & (1 << 6)) continue; // Skip hidden words
|
|
if (strcmp(w->name, name) == 0) return w;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
// Input tokenizer
|
|
char* next_token(void) {
|
|
if (input_ptr == NULL) return NULL;
|
|
while (*input_ptr != '\0' && isspace((unsigned char)*input_ptr)) {
|
|
input_ptr++;
|
|
}
|
|
if (*input_ptr == '\0') return NULL;
|
|
char* start = input_ptr;
|
|
while (*input_ptr != '\0' && !isspace((unsigned char)*input_ptr)) {
|
|
input_ptr++;
|
|
}
|
|
if (*input_ptr != '\0') {
|
|
*input_ptr = '\0';
|
|
input_ptr++;
|
|
}
|
|
return start;
|
|
}
|
|
|
|
// Primitive word implementations
|
|
void do_dup(Word* w) {
|
|
if (sp < 0) return;
|
|
int32_t v = data_stack[sp];
|
|
data_push(v);
|
|
}
|
|
|
|
void do_drop(Word* w) {
|
|
data_pop();
|
|
}
|
|
|
|
void do_swap(Word* w) {
|
|
if (sp < 1) return;
|
|
int32_t a = data_stack[sp-1];
|
|
int32_t b = data_stack[sp];
|
|
data_stack[sp-1] = b;
|
|
data_stack[sp] = a;
|
|
}
|
|
|
|
void do_over(Word* w) {
|
|
if (sp < 1) return;
|
|
data_push(data_stack[sp-1]);
|
|
}
|
|
|
|
void do_add(Word* w) {
|
|
if (sp < 1) return;
|
|
int32_t b = data_pop();
|
|
int32_t a = data_pop();
|
|
data_push(a + b);
|
|
}
|
|
|
|
void do_sub(Word* w) {
|
|
if (sp < 1) return;
|
|
int32_t b = data_pop();
|
|
int32_t a = data_pop();
|
|
data_push(a - b);
|
|
}
|
|
|
|
void do_mul(Word* w) {
|
|
if (sp < 1) return;
|
|
int32_t b = data_pop();
|
|
int32_t a = data_pop();
|
|
data_push(a * b);
|
|
}
|
|
|
|
void do_div(Word* w) {
|
|
if (sp < 1) return;
|
|
int32_t b = data_pop();
|
|
int32_t a = data_pop();
|
|
if (b == 0) {
|
|
printf("Division by zero\n");
|
|
data_push(a);
|
|
data_push(b);
|
|
return;
|
|
}
|
|
data_push(a / b);
|
|
}
|
|
|
|
void do_dot(Word* w) {
|
|
if (sp < 0) return;
|
|
printf("%d ", data_pop());
|
|
fflush(stdout);
|
|
}
|
|
|
|
void do_cr(Word* w) {
|
|
printf("\n");
|
|
fflush(stdout);
|
|
}
|
|
|
|
void do_exit(Word* w) {
|
|
Cell* ret_addr = ret_pop();
|
|
ip = ret_addr;
|
|
}
|
|
|
|
void do_docolon(Word* w) {
|
|
// Push current ip (return address) onto return stack
|
|
ret_push(ip);
|
|
// Set ip to this word's body
|
|
ip = w->body;
|
|
}
|
|
|
|
void do_lit(Word* w) {
|
|
// ip points to the number cell (inner interpreter already incremented past lit word)
|
|
data_push(ip->num);
|
|
ip++; // Move past number cell
|
|
}
|
|
|
|
void do_colon(Word* w) {
|
|
char* name = next_token();
|
|
if (name == NULL) {
|
|
printf("':' expects a name\n");
|
|
return;
|
|
}
|
|
strncpy(compiling_name, name, MAX_NAME_LEN);
|
|
compiling_name[MAX_NAME_LEN] = '\0';
|
|
state = 1; // Enter compile mode
|
|
compile_idx = 0; // Reset compile buffer
|
|
}
|
|
|
|
void do_semicolon(Word* w) {
|
|
if (state != 1) {
|
|
printf("';' is only valid in compile mode\n");
|
|
return;
|
|
}
|
|
Word* exit_w = lookup_word("exit");
|
|
if (exit_w == NULL) {
|
|
printf("Fatal: exit word not found\n");
|
|
return;
|
|
}
|
|
if (compile_idx >= COMPILE_BUF_SIZE) {
|
|
printf("Compile buffer overflow\n");
|
|
return;
|
|
}
|
|
compile_buf[compile_idx++] = (Cell){.word = exit_w};
|
|
|
|
// Copy compiled body to dictionary body storage
|
|
if (body_idx + compile_idx > BODY_SIZE) {
|
|
printf("Dictionary body storage full\n");
|
|
return;
|
|
}
|
|
memcpy(&dict_bodies[body_idx], compile_buf, compile_idx * sizeof(Cell));
|
|
|
|
// Create new word entry
|
|
if (dict_idx >= DICT_SIZE) {
|
|
printf("Dictionary full\n");
|
|
return;
|
|
}
|
|
Word* new_w = &dict[dict_idx++];
|
|
new_w->prev = dict_head;
|
|
dict_head = new_w;
|
|
|
|
size_t len = strlen(compiling_name);
|
|
if (len > MAX_NAME_LEN) len = MAX_NAME_LEN;
|
|
new_w->flags = (uint8_t)len; // No hidden, no immediate
|
|
strncpy(new_w->name, compiling_name, len);
|
|
new_w->name[len] = '\0';
|
|
new_w->code = do_docolon;
|
|
new_w->body = &dict_bodies[body_idx];
|
|
|
|
body_idx += compile_idx;
|
|
state = 0; // Back to interpret mode
|
|
}
|
|
|
|
// Interpreter functions
|
|
void inner_interpreter(void) {
|
|
while (ip != NULL) {
|
|
Cell current = *ip;
|
|
ip++; // Move to next cell
|
|
current.word->code(current.word);
|
|
}
|
|
}
|
|
|
|
void process_token(const char* token) {
|
|
Word* w = lookup_word(token);
|
|
if (w != NULL) {
|
|
if (state == 0) { // Interpret mode
|
|
if (w->code == do_docolon) { // Colon definition
|
|
ret_push(NULL); // Return address to stop interpreter
|
|
ip = w->body;
|
|
inner_interpreter();
|
|
} else { // Primitive word
|
|
w->code(w);
|
|
}
|
|
} else { // Compile mode
|
|
if (w->flags & (1 << 7)) { // Immediate word: execute now
|
|
if (w->code == do_docolon) {
|
|
ret_push(NULL);
|
|
ip = w->body;
|
|
inner_interpreter();
|
|
} else {
|
|
w->code(w);
|
|
}
|
|
} else { // Normal word: compile into current definition
|
|
if (compile_idx >= COMPILE_BUF_SIZE) {
|
|
printf("Compile buffer full\n");
|
|
return;
|
|
}
|
|
compile_buf[compile_idx++] = (Cell){.word = w};
|
|
}
|
|
}
|
|
} else { // Not a known word: try to parse as number
|
|
char* end;
|
|
long v = strtol(token, &end, 10);
|
|
if (end != token && *end == '\0') { // Valid integer
|
|
if (state == 0) { // Interpret mode: push number
|
|
data_push((int32_t)v);
|
|
} else { // Compile mode: compile lit + number
|
|
Word* lit_w = lookup_word("lit");
|
|
if (lit_w == NULL) {
|
|
printf("Fatal: lit word not found\n");
|
|
return;
|
|
}
|
|
if (compile_idx + 2 > COMPILE_BUF_SIZE) {
|
|
printf("Compile buffer full\n");
|
|
return;
|
|
}
|
|
compile_buf[compile_idx++] = (Cell){.word = lit_w};
|
|
compile_buf[compile_idx++] = (Cell){.num = (int32_t)v};
|
|
}
|
|
} else {
|
|
printf("Unknown word: '%s'\n", token);
|
|
}
|
|
}
|
|
}
|
|
|
|
void outer_interpreter(void) {
|
|
while (1) {
|
|
printf("ok ");
|
|
fflush(stdout);
|
|
if (fgets(input_buf, INPUT_BUF_SIZE, stdin) == NULL) {
|
|
break; // EOF
|
|
}
|
|
input_ptr = input_buf;
|
|
char* tok;
|
|
while ((tok = next_token()) != NULL) {
|
|
process_token(tok);
|
|
}
|
|
}
|
|
printf("\n");
|
|
}
|
|
|
|
int main(void) {
|
|
// Register primitive words
|
|
// Hidden words first
|
|
add_primitive("exit", do_exit, 0);
|
|
add_primitive("docolon", do_docolon, 1 << 6); // Hidden
|
|
add_primitive("lit", do_lit, 1 << 6); // Hidden
|
|
|
|
// Public primitives
|
|
add_primitive("dup", do_dup, 0);
|
|
add_primitive("drop", do_drop, 0);
|
|
add_primitive("swap", do_swap, 0);
|
|
add_primitive("over", do_over, 0);
|
|
add_primitive("+", do_add, 0);
|
|
add_primitive("-", do_sub, 0);
|
|
add_primitive("*", do_mul, 0);
|
|
add_primitive("/", do_div, 0);
|
|
add_primitive(".", do_dot, 0);
|
|
add_primitive("cr", do_cr, 0);
|
|
|
|
// Compilation words
|
|
add_primitive(":", do_colon, 0);
|
|
add_primitive(";", do_semicolon, 1 << 7); // Immediate word
|
|
|
|
// Start outer interpreter
|
|
outer_interpreter();
|
|
return 0;
|
|
}
|