From c548f17f1e9d5377cfda9360577b4070aed80b41 Mon Sep 17 00:00:00 2001 From: Oleksandr Kozachuk Date: Sun, 29 Mar 2026 22:48:37 +0200 Subject: [PATCH] Implement core Forth runtime: dictionary, codegen, outer interpreter, REPL - Dictionary: linked-list word headers in simulated linear memory with create/find/reveal, case-insensitive lookup, IMMEDIATE flag support - WASM codegen: IR-to-WASM translation via wasm-encoder with full validation; all stack, arithmetic, comparison, logic, memory, control flow, and return stack operations; wasmtime execution tests - Outer interpreter: tokenizer, number parsing (decimal/$hex/#dec/%bin), interpret/compile dispatch, control structures (IF/ELSE/THEN, BEGIN/UNTIL, BEGIN/WHILE/REPEAT), RECURSE, comments, string output - 40+ primitive words registered via JIT-compiled WASM modules linked to shared memory/globals/table - Interactive REPL with rustyline, piped input, and file execution - 145 tests passing across dictionary, codegen, and runtime --- crates/cli/src/main.rs | 91 ++- crates/core/Cargo.toml | 2 +- crates/core/src/codegen.rs | 1400 ++++++++++++++++++++++++++++++++- crates/core/src/dictionary.rs | 4 +- crates/core/src/outer.rs | 1334 ++++++++++++++++++++++++++++++- 5 files changed, 2798 insertions(+), 33 deletions(-) diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index 0104c31..94b4639 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -1,6 +1,7 @@ //! WAFER CLI: Interactive REPL and AOT compiler for WAFER Forth. use clap::Parser; +use wafer_core::outer::ForthVM; /// WAFER: WebAssembly Forth Engine in Rust #[derive(Parser, Debug)] @@ -21,21 +22,91 @@ struct Cli { fn main() -> anyhow::Result<()> { let cli = Cli::parse(); + let mut vm = ForthVM::new()?; + match cli.file { - Some(ref _file) => { - // TODO: Step 9 - Load and execute Forth file - eprintln!("WAFER: file execution not yet implemented"); + Some(ref file) => { + let source = std::fs::read_to_string(file)?; + vm.evaluate(&source)?; + let output = vm.take_output(); + if !output.is_empty() { + print!("{output}"); + } } None => { - // TODO: Step 9 - Interactive REPL - println!( - "WAFER v{} - WebAssembly Forth Engine in Rust", - env!("CARGO_PKG_VERSION") - ); - println!("Type BYE to exit."); - eprintln!("REPL not yet implemented"); + // Check if stdin is a pipe (not a TTY) + if !atty_is_tty() { + // Non-interactive: read all of stdin and evaluate + let mut input = String::new(); + std::io::Read::read_to_string(&mut std::io::stdin(), &mut input)?; + // Evaluate line-by-line to handle multi-line input + for line in input.lines() { + match vm.evaluate(line) { + Ok(()) => { + let output = vm.take_output(); + if !output.is_empty() { + print!("{output}"); + } + } + Err(e) => { + eprintln!("Error: {e}"); + } + } + } + } else { + // Interactive REPL + println!( + "WAFER v{} - WebAssembly Forth Engine in Rust", + env!("CARGO_PKG_VERSION") + ); + println!("Type BYE to exit."); + + let mut rl = rustyline::DefaultEditor::new()?; + loop { + let prompt = if vm.is_compiling() { " ] " } else { "> " }; + match rl.readline(prompt) { + Ok(line) => { + let trimmed = line.trim(); + if trimmed.eq_ignore_ascii_case("BYE") { + break; + } + let _ = rl.add_history_entry(&line); + match vm.evaluate(&line) { + Ok(()) => { + let output = vm.take_output(); + if !output.is_empty() { + print!("{output}"); + } + if !vm.is_compiling() { + println!(" ok"); + } + } + Err(e) => { + eprintln!("Error: {e}"); + } + } + } + Err( + rustyline::error::ReadlineError::Interrupted + | rustyline::error::ReadlineError::Eof, + ) => { + break; + } + Err(e) => { + eprintln!("Readline error: {e}"); + break; + } + } + } + } } } Ok(()) } + +/// Check if stdin is a terminal (TTY). +fn atty_is_tty() -> bool { + use std::io::IsTerminal; + std::io::stdin().is_terminal() +} diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index a51589b..f339d32 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -8,10 +8,10 @@ license.workspace = true [dependencies] wasm-encoder = { workspace = true } wasmparser = { workspace = true } +wasmtime = { workspace = true } anyhow = { workspace = true } thiserror = { workspace = true } [dev-dependencies] proptest = { workspace = true } insta = { workspace = true } -wasmtime = { workspace = true } diff --git a/crates/core/src/codegen.rs b/crates/core/src/codegen.rs index f53a4db..94b7ab0 100644 --- a/crates/core/src/codegen.rs +++ b/crates/core/src/codegen.rs @@ -1,21 +1,1399 @@ //! WASM code generation from IR. //! //! Translates optimized IR into WASM bytecode using the `wasm-encoder` crate. -//! Supports two modes: -//! - **Typed mode**: when type inference succeeds, values stay in WASM locals -//! - **Fallback mode**: load/store against stack pointer globals in linear memory +//! Currently implements **fallback mode**: all stacks live in linear memory +//! and are accessed via globals (`$dsp`, `$rsp`). -// TODO: Step 5 - Full codegen implementation -// - IR -> WASM function body translation -// - Single-word module generation (JIT mode) -// - Multi-word module generation (AOT/consolidation mode) -// - Typed vs fallback mode selection -// - Function table management +use std::borrow::Cow; + +use wasm_encoder::{ + BlockType, CodeSection, ConstExpr, ElementSection, Elements, EntityType, ExportKind, + ExportSection, Function, FunctionSection, GlobalType, ImportSection, Instruction, MemArg, + MemoryType, Module, RefType, TableType, TypeSection, ValType, +}; + +use crate::error::{WaferError, WaferResult}; +use crate::ir::IrOp; +use crate::memory::CELL_SIZE; + +// --------------------------------------------------------------------------- +// Import indices (order matters: imports numbered sequentially by kind) +// --------------------------------------------------------------------------- + +/// Index of the imported memory. +const MEMORY_INDEX: u32 = 0; + +/// Index of the `$dsp` global (data stack pointer). +const DSP: u32 = 0; + +/// Index of the `$rsp` global (return stack pointer). +const RSP: u32 = 1; + +/// Index of the imported function table. +const TABLE: u32 = 0; + +// Type indices in the type section. +const TYPE_VOID: u32 = 0; // () -> () +const TYPE_I32: u32 = 1; // (i32) -> () + +// The `emit` callback is the first (and only) imported function, so index 0. +// The compiled word is the first (and only) defined function, so index 1. +const EMIT_FUNC: u32 = 0; +const WORD_FUNC: u32 = 1; + +/// Natural-alignment MemArg for 4-byte i32 operations. +const MEM4: MemArg = MemArg { + offset: 0, + align: 2, // 2^2 = 4 + memory_index: MEMORY_INDEX, +}; + +/// MemArg for single-byte operations. +const MEM1: MemArg = MemArg { + offset: 0, + align: 0, // 2^0 = 1 + memory_index: MEMORY_INDEX, +}; + +// --------------------------------------------------------------------------- +// Public types +// --------------------------------------------------------------------------- + +/// Configuration for code generation. +#[derive(Debug, Clone)] +pub struct CodegenConfig { + /// Base function index (for the function table). + pub base_fn_index: u32, + /// Number of functions already in the table. + pub table_size: u32, +} + +/// Result of compiling a word to WASM. +#[derive(Debug, Clone)] +pub struct CompiledModule { + /// The WASM binary bytes. + pub bytes: Vec, + /// Function index in the table for this word. + pub fn_index: u32, +} + +// --------------------------------------------------------------------------- +// Instruction-level helpers (free functions that take &mut Function) +// --------------------------------------------------------------------------- + +/// Decrement `$dsp` by CELL_SIZE. +fn dsp_dec(f: &mut Function) { + f.instruction(&Instruction::GlobalGet(DSP)) + .instruction(&Instruction::I32Const(CELL_SIZE as i32)) + .instruction(&Instruction::I32Sub) + .instruction(&Instruction::GlobalSet(DSP)); +} + +/// Increment `$dsp` by CELL_SIZE. +fn dsp_inc(f: &mut Function) { + f.instruction(&Instruction::GlobalGet(DSP)) + .instruction(&Instruction::I32Const(CELL_SIZE as i32)) + .instruction(&Instruction::I32Add) + .instruction(&Instruction::GlobalSet(DSP)); +} + +/// Push an i32 value that is already on the WASM operand stack onto the +/// data stack in linear memory, using `tmp` as a scratch local. +/// +/// Sequence: local.set tmp; dsp -= 4; mem[dsp] = local.get tmp +fn push_via_local(f: &mut Function, tmp: u32) { + f.instruction(&Instruction::LocalSet(tmp)); + dsp_dec(f); + f.instruction(&Instruction::GlobalGet(DSP)) + .instruction(&Instruction::LocalGet(tmp)) + .instruction(&Instruction::I32Store(MEM4)); +} + +/// Push a known i32 constant onto the data stack. +fn push_const(f: &mut Function, value: i32) { + dsp_dec(f); + f.instruction(&Instruction::GlobalGet(DSP)) + .instruction(&Instruction::I32Const(value)) + .instruction(&Instruction::I32Store(MEM4)); +} + +/// Pop the top of the data stack onto the WASM operand stack. +fn pop(f: &mut Function) { + f.instruction(&Instruction::GlobalGet(DSP)) + .instruction(&Instruction::I32Load(MEM4)); + dsp_inc(f); +} + +/// Pop the top of the data stack into a local. +fn pop_to(f: &mut Function, local: u32) { + pop(f); + f.instruction(&Instruction::LocalSet(local)); +} + +/// Read the top of the data stack without popping (value on operand stack). +fn peek(f: &mut Function) { + f.instruction(&Instruction::GlobalGet(DSP)) + .instruction(&Instruction::I32Load(MEM4)); +} + +/// Push a value from the WASM operand stack onto the return stack via `tmp`. +fn rpush_via_local(f: &mut Function, tmp: u32) { + f.instruction(&Instruction::LocalSet(tmp)); + // rsp -= CELL_SIZE + f.instruction(&Instruction::GlobalGet(RSP)) + .instruction(&Instruction::I32Const(CELL_SIZE as i32)) + .instruction(&Instruction::I32Sub) + .instruction(&Instruction::GlobalSet(RSP)); + // mem[rsp] = value + f.instruction(&Instruction::GlobalGet(RSP)) + .instruction(&Instruction::LocalGet(tmp)) + .instruction(&Instruction::I32Store(MEM4)); +} + +/// Pop the return stack onto the WASM operand stack. +fn rpop(f: &mut Function) { + f.instruction(&Instruction::GlobalGet(RSP)) + .instruction(&Instruction::I32Load(MEM4)); + // rsp += CELL_SIZE + f.instruction(&Instruction::GlobalGet(RSP)) + .instruction(&Instruction::I32Const(CELL_SIZE as i32)) + .instruction(&Instruction::I32Add) + .instruction(&Instruction::GlobalSet(RSP)); +} + +/// Peek at the top of the return stack (no pop). +fn rpeek(f: &mut Function) { + f.instruction(&Instruction::GlobalGet(RSP)) + .instruction(&Instruction::I32Load(MEM4)); +} + +/// Convert a WASM boolean (0 or 1 on operand stack) to a Forth flag (0 or -1). +/// Uses `tmp` as scratch local. +fn bool_to_forth_flag(f: &mut Function, tmp: u32) { + // 0 - result: if result=1 => -1, if result=0 => 0 + f.instruction(&Instruction::LocalSet(tmp)) + .instruction(&Instruction::I32Const(0)) + .instruction(&Instruction::LocalGet(tmp)) + .instruction(&Instruction::I32Sub); +} + +// --------------------------------------------------------------------------- +// IR emission +// --------------------------------------------------------------------------- + +/// Emit all IR operations in `ops` into the WASM function body `f`. +fn emit_body(f: &mut Function, ops: &[IrOp]) { + for op in ops { + emit_op(f, op); + } +} + +/// Emit a single IR operation. +#[allow(clippy::too_many_lines)] +fn emit_op(f: &mut Function, op: &IrOp) { + match op { + // -- Literals ------------------------------------------------------- + IrOp::PushI32(n) => push_const(f, *n), + IrOp::PushI64(_) | IrOp::PushF64(_) => { /* TODO: double / float stacks */ } + + // -- Stack manipulation --------------------------------------------- + IrOp::Drop => dsp_inc(f), + + IrOp::Dup => { + peek(f); + push_via_local(f, 0); + } + + IrOp::Swap => { + // ( a b -- b a ) + pop_to(f, 0); // b + pop_to(f, 1); // a + f.instruction(&Instruction::LocalGet(0)); + push_via_local(f, 2); + f.instruction(&Instruction::LocalGet(1)); + push_via_local(f, 2); + } + + IrOp::Over => { + // ( a b -- a b a ) : read second item + f.instruction(&Instruction::GlobalGet(DSP)) + .instruction(&Instruction::I32Const(CELL_SIZE as i32)) + .instruction(&Instruction::I32Add) + .instruction(&Instruction::I32Load(MEM4)); + push_via_local(f, 0); + } + + IrOp::Rot => { + // ( a b c -- b c a ) + pop_to(f, 0); // c + pop_to(f, 1); // b + pop_to(f, 2); // a + f.instruction(&Instruction::LocalGet(1)); + push_via_local(f, 3); + f.instruction(&Instruction::LocalGet(0)); + push_via_local(f, 3); + f.instruction(&Instruction::LocalGet(2)); + push_via_local(f, 3); + } + + IrOp::Nip => { + // ( a b -- b ) + pop_to(f, 0); // b + dsp_inc(f); // drop a + f.instruction(&Instruction::LocalGet(0)); + push_via_local(f, 1); + } + + IrOp::Tuck => { + // ( a b -- b a b ) + pop_to(f, 0); // b + pop_to(f, 1); // a + f.instruction(&Instruction::LocalGet(0)); + push_via_local(f, 2); + f.instruction(&Instruction::LocalGet(1)); + push_via_local(f, 2); + f.instruction(&Instruction::LocalGet(0)); + push_via_local(f, 2); + } + + // -- Arithmetic ----------------------------------------------------- + IrOp::Add => emit_binary_commutative(f, &Instruction::I32Add), + IrOp::Mul => emit_binary_commutative(f, &Instruction::I32Mul), + + IrOp::Sub => { + // ( a b -- a-b ) + pop_to(f, 0); // b + pop_to(f, 1); // a + f.instruction(&Instruction::LocalGet(1)) + .instruction(&Instruction::LocalGet(0)) + .instruction(&Instruction::I32Sub); + push_via_local(f, 2); + } + + IrOp::DivMod => { + // ( n1 n2 -- rem quot ) + pop_to(f, 0); // n2 + pop_to(f, 1); // n1 + // Push remainder first (deeper) + f.instruction(&Instruction::LocalGet(1)) + .instruction(&Instruction::LocalGet(0)) + .instruction(&Instruction::I32RemS); + push_via_local(f, 2); + // Push quotient on top + f.instruction(&Instruction::LocalGet(1)) + .instruction(&Instruction::LocalGet(0)) + .instruction(&Instruction::I32DivS); + push_via_local(f, 2); + } + + IrOp::Negate => { + pop_to(f, 0); + f.instruction(&Instruction::I32Const(0)) + .instruction(&Instruction::LocalGet(0)) + .instruction(&Instruction::I32Sub); + push_via_local(f, 1); + } + + IrOp::Abs => { + pop_to(f, 0); + // if local0 < 0: local0 = 0 - local0 + f.instruction(&Instruction::LocalGet(0)) + .instruction(&Instruction::I32Const(0)) + .instruction(&Instruction::I32LtS) + .instruction(&Instruction::If(BlockType::Empty)) + .instruction(&Instruction::I32Const(0)) + .instruction(&Instruction::LocalGet(0)) + .instruction(&Instruction::I32Sub) + .instruction(&Instruction::LocalSet(0)) + .instruction(&Instruction::End); + f.instruction(&Instruction::LocalGet(0)); + push_via_local(f, 1); + } + + // -- Comparison ----------------------------------------------------- + IrOp::Eq => emit_cmp(f, &Instruction::I32Eq), + IrOp::NotEq => emit_cmp(f, &Instruction::I32Ne), + IrOp::Lt => emit_cmp(f, &Instruction::I32LtS), + IrOp::Gt => emit_cmp(f, &Instruction::I32GtS), + IrOp::LtUnsigned => emit_cmp(f, &Instruction::I32LtU), + + IrOp::ZeroEq => { + pop(f); + f.instruction(&Instruction::I32Eqz); + bool_to_forth_flag(f, 0); + push_via_local(f, 1); + } + + IrOp::ZeroLt => { + pop(f); + f.instruction(&Instruction::I32Const(0)) + .instruction(&Instruction::I32LtS); + bool_to_forth_flag(f, 0); + push_via_local(f, 1); + } + + // -- Logic ---------------------------------------------------------- + IrOp::And => emit_binary_commutative(f, &Instruction::I32And), + IrOp::Or => emit_binary_commutative(f, &Instruction::I32Or), + IrOp::Xor => emit_binary_commutative(f, &Instruction::I32Xor), + + IrOp::Invert => { + pop(f); + f.instruction(&Instruction::I32Const(-1)) + .instruction(&Instruction::I32Xor); + push_via_local(f, 0); + } + + IrOp::Lshift => emit_binary_ordered(f, &Instruction::I32Shl), + IrOp::Rshift => emit_binary_ordered(f, &Instruction::I32ShrS), + + // -- Memory --------------------------------------------------------- + IrOp::Fetch => { + // ( addr -- value ) + pop(f); + f.instruction(&Instruction::I32Load(MEM4)); + push_via_local(f, 0); + } + + IrOp::Store => { + // ( x addr -- ) + pop_to(f, 0); // addr + pop_to(f, 1); // x + f.instruction(&Instruction::LocalGet(0)) + .instruction(&Instruction::LocalGet(1)) + .instruction(&Instruction::I32Store(MEM4)); + } + + IrOp::CFetch => { + pop(f); + f.instruction(&Instruction::I32Load8U(MEM1)); + push_via_local(f, 0); + } + + IrOp::CStore => { + pop_to(f, 0); // addr + pop_to(f, 1); // char + f.instruction(&Instruction::LocalGet(0)) + .instruction(&Instruction::LocalGet(1)) + .instruction(&Instruction::I32Store8(MEM1)); + } + + IrOp::PlusStore => { + // ( n addr -- ) : mem[addr] += n + pop_to(f, 0); // addr + pop_to(f, 1); // n + f.instruction(&Instruction::LocalGet(0)) + .instruction(&Instruction::LocalGet(0)) + .instruction(&Instruction::I32Load(MEM4)) + .instruction(&Instruction::LocalGet(1)) + .instruction(&Instruction::I32Add) + .instruction(&Instruction::I32Store(MEM4)); + } + + // -- Control flow --------------------------------------------------- + IrOp::Call(word_id) => { + f.instruction(&Instruction::I32Const(word_id.0 as i32)) + .instruction(&Instruction::CallIndirect { + type_index: TYPE_VOID, + table_index: TABLE, + }); + } + + IrOp::TailCall(word_id) => { + f.instruction(&Instruction::I32Const(word_id.0 as i32)) + .instruction(&Instruction::CallIndirect { + type_index: TYPE_VOID, + table_index: TABLE, + }) + .instruction(&Instruction::Return); + } + + IrOp::If { + then_body, + else_body, + } => { + pop(f); + f.instruction(&Instruction::If(BlockType::Empty)); + emit_body(f, then_body); + if let Some(eb) = else_body { + f.instruction(&Instruction::Else); + emit_body(f, eb); + } + f.instruction(&Instruction::End); + } + + IrOp::DoLoop { body, is_plus_loop } => { + emit_do_loop(f, body, *is_plus_loop); + } + + IrOp::BeginUntil { body } => { + f.instruction(&Instruction::Loop(BlockType::Empty)); + emit_body(f, body); + pop(f); + f.instruction(&Instruction::I32Eqz) + .instruction(&Instruction::BrIf(0)) + .instruction(&Instruction::End); + } + + IrOp::BeginWhileRepeat { test, body } => { + f.instruction(&Instruction::Block(BlockType::Empty)); + f.instruction(&Instruction::Loop(BlockType::Empty)); + emit_body(f, test); + pop(f); + f.instruction(&Instruction::I32Eqz) + .instruction(&Instruction::BrIf(1)); // break to outer block + emit_body(f, body); + f.instruction(&Instruction::Br(0)) // continue loop + .instruction(&Instruction::End) // end loop + .instruction(&Instruction::End); // end block + } + + IrOp::Exit => { + f.instruction(&Instruction::Return); + } + + // -- Return stack --------------------------------------------------- + IrOp::ToR => { + pop(f); + rpush_via_local(f, 0); + } + + IrOp::FromR => { + rpop(f); + push_via_local(f, 0); + } + + IrOp::RFetch => { + rpeek(f); + push_via_local(f, 0); + } + + // -- I/O ------------------------------------------------------------ + IrOp::Emit => { + pop(f); + f.instruction(&Instruction::Call(EMIT_FUNC)); + } + + IrOp::Dot => { + // MVP stub: pop and discard + pop(f); + f.instruction(&Instruction::Drop); + } + + IrOp::Cr => { + f.instruction(&Instruction::I32Const(10)) + .instruction(&Instruction::Call(EMIT_FUNC)); + } + + IrOp::Type => { + // MVP stub: drop both (c-addr u) + pop(f); + f.instruction(&Instruction::Drop); + pop(f); + f.instruction(&Instruction::Drop); + } + + // -- System --------------------------------------------------------- + IrOp::Execute => { + pop(f); + f.instruction(&Instruction::CallIndirect { + type_index: TYPE_VOID, + table_index: TABLE, + }); + } + } +} + +/// Binary operation where operand order does not matter (commutative). +/// Pops two from data stack, applies `op`, pushes result. +fn emit_binary_commutative(f: &mut Function, op: &Instruction<'_>) { + pop_to(f, 0); // second operand + pop_to(f, 1); // first operand + f.instruction(&Instruction::LocalGet(1)) + .instruction(&Instruction::LocalGet(0)) + .instruction(op); + push_via_local(f, 2); +} + +/// Binary operation where operand order matters: ( a b -- a OP b ). +/// First pops b, then a, pushes a OP b. +fn emit_binary_ordered(f: &mut Function, op: &Instruction<'_>) { + pop_to(f, 0); // b + pop_to(f, 1); // a + f.instruction(&Instruction::LocalGet(1)) + .instruction(&Instruction::LocalGet(0)) + .instruction(op); + push_via_local(f, 2); +} + +/// Comparison: pop two, compare, push Forth flag (-1 or 0). +fn emit_cmp(f: &mut Function, cmp: &Instruction<'_>) { + pop_to(f, 0); // b + pop_to(f, 1); // a + f.instruction(&Instruction::LocalGet(1)) + .instruction(&Instruction::LocalGet(0)) + .instruction(cmp); + bool_to_forth_flag(f, 2); + push_via_local(f, 3); +} + +/// Emit a DO...LOOP / DO...+LOOP construct. +fn emit_do_loop(f: &mut Function, body: &[IrOp], is_plus_loop: bool) { + // DO ( limit index -- ) + pop_to(f, 0); // index + pop_to(f, 1); // limit + + // Push limit then index to return stack + f.instruction(&Instruction::LocalGet(1)); + rpush_via_local(f, 2); + f.instruction(&Instruction::LocalGet(0)); + rpush_via_local(f, 2); + + // block $exit + // loop $continue + // + // -- update index, check, branch + // end + // end + f.instruction(&Instruction::Block(BlockType::Empty)); + f.instruction(&Instruction::Loop(BlockType::Empty)); + + emit_body(f, body); + + // Pop current index from return stack + rpop(f); + if is_plus_loop { + f.instruction(&Instruction::LocalSet(0)); + pop_to(f, 2); // increment from data stack + f.instruction(&Instruction::LocalGet(0)) + .instruction(&Instruction::LocalGet(2)) + .instruction(&Instruction::I32Add) + .instruction(&Instruction::LocalSet(0)); + } else { + f.instruction(&Instruction::I32Const(1)) + .instruction(&Instruction::I32Add) + .instruction(&Instruction::LocalSet(0)); + } + + // Peek limit from return stack + rpeek(f); + f.instruction(&Instruction::LocalSet(1)); + + // Push updated index back to return stack + f.instruction(&Instruction::LocalGet(0)); + rpush_via_local(f, 2); + + // if index >= limit, exit + f.instruction(&Instruction::LocalGet(0)) + .instruction(&Instruction::LocalGet(1)) + .instruction(&Instruction::I32GeS) + .instruction(&Instruction::BrIf(1)) // break to $exit (block, depth 1) + .instruction(&Instruction::Br(0)) // continue $continue (loop, depth 0) + .instruction(&Instruction::End) // end loop + .instruction(&Instruction::End); // end block + + // Clean up: pop index and limit from return stack + rpop(f); + f.instruction(&Instruction::Drop); + rpop(f); + f.instruction(&Instruction::Drop); +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/// Estimate how many scratch locals a function body needs. +fn count_needed_locals(ops: &[IrOp]) -> u32 { + let mut max: u32 = 4; // baseline scratch space + for op in ops { + match op { + IrOp::Rot | IrOp::Tuck => max = max.max(4), + IrOp::DoLoop { body, .. } => max = max.max(count_needed_locals(body)), + IrOp::BeginUntil { body } => max = max.max(count_needed_locals(body)), + IrOp::BeginWhileRepeat { test, body } => { + max = max + .max(count_needed_locals(test)) + .max(count_needed_locals(body)); + } + IrOp::If { + then_body, + else_body, + } => { + max = max.max(count_needed_locals(then_body)); + if let Some(eb) = else_body { + max = max.max(count_needed_locals(eb)); + } + } + _ => {} + } + } + max +} + +/// Generate a complete WASM module for a single compiled word. +/// +/// This is the JIT path: each word gets its own module that imports +/// shared memory, globals, and function table from the host. +pub fn compile_word( + _name: &str, + body: &[IrOp], + config: &CodegenConfig, +) -> WaferResult { + let mut module = Module::new(); + + // -- Type section -- + let mut types = TypeSection::new(); + types.ty().function([], []); // type 0: () -> () + types.ty().function([ValType::I32], []); // type 1: (i32) -> () + module.section(&types); + + // -- Import section -- + let mut imports = ImportSection::new(); + imports.import("env", "emit", EntityType::Function(TYPE_I32)); + imports.import( + "env", + "memory", + EntityType::Memory(MemoryType { + minimum: 1, + maximum: None, + memory64: false, + shared: false, + page_size_log2: None, + }), + ); + imports.import( + "env", + "dsp", + EntityType::Global(GlobalType { + val_type: ValType::I32, + mutable: true, + shared: false, + }), + ); + imports.import( + "env", + "rsp", + EntityType::Global(GlobalType { + val_type: ValType::I32, + mutable: true, + shared: false, + }), + ); + imports.import( + "env", + "table", + EntityType::Table(TableType { + element_type: RefType::FUNCREF, + minimum: config.table_size as u64, + maximum: None, + table64: false, + shared: false, + }), + ); + module.section(&imports); + + // -- Function section -- + let mut functions = FunctionSection::new(); + functions.function(TYPE_VOID); + module.section(&functions); + + // -- Export section -- + let mut exports = ExportSection::new(); + exports.export("fn", ExportKind::Func, WORD_FUNC); + module.section(&exports); + + // -- Element section -- + let mut elements = ElementSection::new(); + let offset = ConstExpr::i32_const(config.base_fn_index as i32); + let indices = [WORD_FUNC]; + elements.active( + Some(TABLE), + &offset, + Elements::Functions(Cow::Borrowed(&indices)), + ); + module.section(&elements); + + // -- Code section -- + let num_locals = count_needed_locals(body); + let mut func = Function::new(vec![(num_locals, ValType::I32)]); + emit_body(&mut func, body); + func.instruction(&Instruction::End); + + let mut code = CodeSection::new(); + code.function(&func); + module.section(&code); + + let bytes = module.finish(); + + // Validate + wasmparser::validate(&bytes).map_err(|e| { + WaferError::ValidationError(format!("Generated WASM failed validation: {e}")) + })?; + + Ok(CompiledModule { + bytes, + fn_index: config.base_fn_index, + }) +} + +/// Generate the core/bootstrap WASM module. +/// +/// Not yet implemented -- will be built in a future step. +pub fn compile_core_module(primitives: &[(String, Vec)]) -> WaferResult> { + let _ = primitives; + Err(WaferError::CodegenError( + "compile_core_module not yet implemented".to_string(), + )) +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- #[cfg(test)] mod tests { + use super::*; + use crate::dictionary::WordId; + use crate::ir::IrOp; + use crate::memory::{DATA_STACK_TOP, RETURN_STACK_TOP}; + + fn default_config() -> CodegenConfig { + CodegenConfig { + base_fn_index: 0, + table_size: 16, + } + } + + fn validate_wasm(bytes: &[u8]) -> Result<(), String> { + wasmparser::validate(bytes) + .map(|_| ()) + .map_err(|e| e.to_string()) + } + + // =================================================================== + // Validation-only tests + // =================================================================== + #[test] - fn placeholder() { - // Codegen tests will be added in Step 5 + fn compile_push_i32_validates() { + let m = compile_word("test", &[IrOp::PushI32(42)], &default_config()).unwrap(); + validate_wasm(&m.bytes).unwrap(); + } + + #[test] + fn compile_arithmetic_validates() { + let ops = vec![IrOp::PushI32(3), IrOp::PushI32(4), IrOp::Add]; + let m = compile_word("add_test", &ops, &default_config()).unwrap(); + validate_wasm(&m.bytes).unwrap(); + } + + #[test] + fn compile_if_else_validates() { + let ops = vec![ + IrOp::PushI32(1), + IrOp::If { + then_body: vec![IrOp::PushI32(42)], + else_body: Some(vec![IrOp::PushI32(0)]), + }, + ]; + let m = compile_word("if_test", &ops, &default_config()).unwrap(); + validate_wasm(&m.bytes).unwrap(); + } + + #[test] + fn compile_call_validates() { + let ops = vec![IrOp::Call(WordId(5))]; + let m = compile_word("call_test", &ops, &default_config()).unwrap(); + validate_wasm(&m.bytes).unwrap(); + } + + #[test] + fn compile_stack_ops_validates() { + let ops = vec![ + IrOp::PushI32(1), + IrOp::PushI32(2), + IrOp::Dup, + IrOp::Swap, + IrOp::Over, + IrOp::Rot, + IrOp::Drop, + IrOp::Drop, + IrOp::Drop, + ]; + let m = compile_word("stack_ops", &ops, &default_config()).unwrap(); + validate_wasm(&m.bytes).unwrap(); + } + + #[test] + fn compile_comparisons_validate() { + for op in [IrOp::Eq, IrOp::NotEq, IrOp::Lt, IrOp::Gt, IrOp::LtUnsigned] { + let ops = vec![IrOp::PushI32(3), IrOp::PushI32(4), op]; + compile_word("cmp", &ops, &default_config()).unwrap(); + } + for op in [IrOp::ZeroEq, IrOp::ZeroLt] { + let ops = vec![IrOp::PushI32(0), op]; + compile_word("zcmp", &ops, &default_config()).unwrap(); + } + } + + #[test] + fn compile_logic_ops_validates() { + let ops = vec![ + IrOp::PushI32(0xFF), + IrOp::PushI32(0x0F), + IrOp::And, + IrOp::PushI32(0xF0), + IrOp::Or, + IrOp::Invert, + ]; + compile_word("logic", &ops, &default_config()).unwrap(); + } + + #[test] + fn compile_memory_ops_validates() { + let ops = vec![ + IrOp::PushI32(42), + IrOp::PushI32(0x100), + IrOp::Store, + IrOp::PushI32(0x100), + IrOp::Fetch, + ]; + compile_word("mem", &ops, &default_config()).unwrap(); + } + + #[test] + fn compile_begin_until_validates() { + let ops = vec![ + IrOp::PushI32(5), + IrOp::BeginUntil { + body: vec![IrOp::PushI32(1), IrOp::Sub, IrOp::Dup, IrOp::ZeroEq], + }, + ]; + compile_word("bu", &ops, &default_config()).unwrap(); + } + + #[test] + fn compile_begin_while_repeat_validates() { + let ops = vec![ + IrOp::PushI32(3), + IrOp::BeginWhileRepeat { + test: vec![IrOp::Dup], + body: vec![IrOp::PushI32(1), IrOp::Sub], + }, + ]; + compile_word("bwr", &ops, &default_config()).unwrap(); + } + + #[test] + fn compile_return_stack_validates() { + let ops = vec![IrOp::PushI32(42), IrOp::ToR, IrOp::RFetch, IrOp::FromR]; + compile_word("rs", &ops, &default_config()).unwrap(); + } + + #[test] + fn compile_shift_ops_validates() { + let ops = vec![ + IrOp::PushI32(1), + IrOp::PushI32(4), + IrOp::Lshift, + IrOp::PushI32(2), + IrOp::Rshift, + ]; + compile_word("shift", &ops, &default_config()).unwrap(); + } + + #[test] + fn compile_emit_validates() { + compile_word("emit", &[IrOp::PushI32(65), IrOp::Emit], &default_config()).unwrap(); + } + + #[test] + fn compile_cr_validates() { + compile_word("cr", &[IrOp::Cr], &default_config()).unwrap(); + } + + #[test] + fn compile_exit_validates() { + compile_word("exit", &[IrOp::PushI32(1), IrOp::Exit], &default_config()).unwrap(); + } + + #[test] + fn compile_nip_tuck_validates() { + let ops = vec![ + IrOp::PushI32(1), + IrOp::PushI32(2), + IrOp::Nip, + IrOp::PushI32(3), + IrOp::Tuck, + ]; + compile_word("nt", &ops, &default_config()).unwrap(); + } + + #[test] + fn compile_divmod_validates() { + compile_word( + "dm", + &[IrOp::PushI32(10), IrOp::PushI32(3), IrOp::DivMod], + &default_config(), + ) + .unwrap(); + } + + #[test] + fn compile_negate_abs_validates() { + compile_word( + "na", + &[IrOp::PushI32(-5), IrOp::Abs, IrOp::Negate], + &default_config(), + ) + .unwrap(); + } + + #[test] + fn compile_empty_body_validates() { + compile_word("noop", &[], &default_config()).unwrap(); + } + + #[test] + fn compile_cfetch_cstore_validates() { + let ops = vec![ + IrOp::PushI32(65), + IrOp::PushI32(0x200), + IrOp::CStore, + IrOp::PushI32(0x200), + IrOp::CFetch, + ]; + compile_word("byte", &ops, &default_config()).unwrap(); + } + + #[test] + fn compile_plus_store_validates() { + let ops = vec![ + IrOp::PushI32(10), + IrOp::PushI32(0x100), + IrOp::Store, + IrOp::PushI32(5), + IrOp::PushI32(0x100), + IrOp::PlusStore, + ]; + compile_word("ps", &ops, &default_config()).unwrap(); + } + + #[test] + fn compiled_module_fn_index() { + let cfg = CodegenConfig { + base_fn_index: 7, + table_size: 16, + }; + let m = compile_word("t", &[IrOp::PushI32(1)], &cfg).unwrap(); + assert_eq!(m.fn_index, 7); + } + + // =================================================================== + // Wasmtime execution tests + // =================================================================== + + /// Run a compiled word via wasmtime and return the data stack (top first). + fn run_word(ops: &[IrOp]) -> Vec { + use wasmtime::*; + + let compiled = compile_word("test", ops, &default_config()).unwrap(); + let engine = Engine::default(); + let mut store = Store::new(&engine, ()); + + let memory = Memory::new(&mut store, MemoryType::new(16, None)).unwrap(); + + let dsp = Global::new( + &mut store, + wasmtime::GlobalType::new(ValType::I32, Mutability::Var), + Val::I32(DATA_STACK_TOP as i32), + ) + .unwrap(); + + let rsp = Global::new( + &mut store, + wasmtime::GlobalType::new(ValType::I32, Mutability::Var), + Val::I32(RETURN_STACK_TOP as i32), + ) + .unwrap(); + + let table = Table::new( + &mut store, + wasmtime::TableType::new(RefType::FUNCREF, 16, None), + Ref::Func(None), + ) + .unwrap(); + + let emit_ty = FuncType::new(&engine, [ValType::I32], []); + let emit = Func::new(&mut store, emit_ty, |_caller, _params, _results| Ok(())); + + let module = wasmtime::Module::new(&engine, &compiled.bytes).unwrap(); + let instance = Instance::new( + &mut store, + &module, + &[ + emit.into(), + memory.into(), + dsp.into(), + rsp.into(), + table.into(), + ], + ) + .unwrap(); + + instance + .get_func(&mut store, "fn") + .unwrap() + .call(&mut store, &[], &mut []) + .unwrap(); + + // Read data stack + let sp = dsp.get(&mut store).unwrap_i32() as u32; + let data = memory.data(&store); + let mut stack = Vec::new(); + let mut addr = sp; + while addr < DATA_STACK_TOP { + let b: [u8; 4] = data[addr as usize..addr as usize + 4].try_into().unwrap(); + stack.push(i32::from_le_bytes(b)); + addr += CELL_SIZE; + } + stack + } + + #[test] + fn execute_push_i32() { + assert_eq!(run_word(&[IrOp::PushI32(42)]), vec![42]); + } + + #[test] + fn execute_push_multiple() { + assert_eq!( + run_word(&[IrOp::PushI32(1), IrOp::PushI32(2), IrOp::PushI32(3)]), + vec![3, 2, 1], + ); + } + + #[test] + fn execute_add() { + assert_eq!( + run_word(&[IrOp::PushI32(3), IrOp::PushI32(4), IrOp::Add]), + vec![7] + ); + } + + #[test] + fn execute_sub() { + assert_eq!( + run_word(&[IrOp::PushI32(10), IrOp::PushI32(3), IrOp::Sub]), + vec![7] + ); + } + + #[test] + fn execute_mul() { + assert_eq!( + run_word(&[IrOp::PushI32(6), IrOp::PushI32(7), IrOp::Mul]), + vec![42] + ); + } + + #[test] + fn execute_divmod() { + // ( 10 3 -- rem quot ) => ( 1 3 ) => top-first: [3, 1] + assert_eq!( + run_word(&[IrOp::PushI32(10), IrOp::PushI32(3), IrOp::DivMod]), + vec![3, 1] + ); + } + + #[test] + fn execute_dup() { + assert_eq!(run_word(&[IrOp::PushI32(42), IrOp::Dup]), vec![42, 42]); + } + + #[test] + fn execute_drop() { + assert_eq!( + run_word(&[IrOp::PushI32(1), IrOp::PushI32(2), IrOp::Drop]), + vec![1] + ); + } + + #[test] + fn execute_swap() { + // ( 1 2 -- 2 1 ) => top-first: [1, 2] + assert_eq!( + run_word(&[IrOp::PushI32(1), IrOp::PushI32(2), IrOp::Swap]), + vec![1, 2] + ); + } + + #[test] + fn execute_over() { + // ( 1 2 -- 1 2 1 ) + assert_eq!( + run_word(&[IrOp::PushI32(1), IrOp::PushI32(2), IrOp::Over]), + vec![1, 2, 1] + ); + } + + #[test] + fn execute_rot() { + // ( 1 2 3 -- 2 3 1 ) => top-first: [1, 3, 2] + assert_eq!( + run_word(&[ + IrOp::PushI32(1), + IrOp::PushI32(2), + IrOp::PushI32(3), + IrOp::Rot + ]), + vec![1, 3, 2], + ); + } + + #[test] + fn execute_negate() { + assert_eq!(run_word(&[IrOp::PushI32(5), IrOp::Negate]), vec![-5]); + } + + #[test] + fn execute_abs() { + assert_eq!(run_word(&[IrOp::PushI32(-42), IrOp::Abs]), vec![42]); + assert_eq!(run_word(&[IrOp::PushI32(42), IrOp::Abs]), vec![42]); + } + + #[test] + fn execute_eq() { + assert_eq!( + run_word(&[IrOp::PushI32(5), IrOp::PushI32(5), IrOp::Eq]), + vec![-1] + ); + assert_eq!( + run_word(&[IrOp::PushI32(3), IrOp::PushI32(5), IrOp::Eq]), + vec![0] + ); + } + + #[test] + fn execute_lt() { + assert_eq!( + run_word(&[IrOp::PushI32(3), IrOp::PushI32(5), IrOp::Lt]), + vec![-1] + ); + assert_eq!( + run_word(&[IrOp::PushI32(5), IrOp::PushI32(3), IrOp::Lt]), + vec![0] + ); + } + + #[test] + fn execute_gt() { + assert_eq!( + run_word(&[IrOp::PushI32(5), IrOp::PushI32(3), IrOp::Gt]), + vec![-1] + ); + } + + #[test] + fn execute_zero_eq() { + assert_eq!(run_word(&[IrOp::PushI32(0), IrOp::ZeroEq]), vec![-1]); + assert_eq!(run_word(&[IrOp::PushI32(5), IrOp::ZeroEq]), vec![0]); + } + + #[test] + fn execute_zero_lt() { + assert_eq!(run_word(&[IrOp::PushI32(-1), IrOp::ZeroLt]), vec![-1]); + assert_eq!(run_word(&[IrOp::PushI32(0), IrOp::ZeroLt]), vec![0]); + } + + #[test] + fn execute_and_or_xor() { + assert_eq!( + run_word(&[IrOp::PushI32(0xFF), IrOp::PushI32(0x0F), IrOp::And]), + vec![0x0F] + ); + assert_eq!( + run_word(&[IrOp::PushI32(0xF0), IrOp::PushI32(0x0F), IrOp::Or]), + vec![0xFF] + ); + assert_eq!( + run_word(&[IrOp::PushI32(0xFF), IrOp::PushI32(0xF0), IrOp::Xor]), + vec![0x0F] + ); + } + + #[test] + fn execute_invert() { + assert_eq!(run_word(&[IrOp::PushI32(0), IrOp::Invert]), vec![-1]); + } + + #[test] + fn execute_shifts() { + assert_eq!( + run_word(&[IrOp::PushI32(1), IrOp::PushI32(4), IrOp::Lshift]), + vec![16] + ); + assert_eq!( + run_word(&[IrOp::PushI32(16), IrOp::PushI32(2), IrOp::Rshift]), + vec![4] + ); + } + + #[test] + fn execute_fetch_store() { + let ops = vec![ + IrOp::PushI32(42), + IrOp::PushI32(0x100), + IrOp::Store, + IrOp::PushI32(0x100), + IrOp::Fetch, + ]; + assert_eq!(run_word(&ops), vec![42]); + } + + #[test] + fn execute_cfetch_cstore() { + let ops = vec![ + IrOp::PushI32(65), + IrOp::PushI32(0x200), + IrOp::CStore, + IrOp::PushI32(0x200), + IrOp::CFetch, + ]; + assert_eq!(run_word(&ops), vec![65]); + } + + #[test] + fn execute_if_then_else() { + // TRUE path + let ops = vec![ + IrOp::PushI32(-1), + IrOp::If { + then_body: vec![IrOp::PushI32(42)], + else_body: Some(vec![IrOp::PushI32(0)]), + }, + ]; + assert_eq!(run_word(&ops), vec![42]); + + // FALSE path + let ops = vec![ + IrOp::PushI32(0), + IrOp::If { + then_body: vec![IrOp::PushI32(42)], + else_body: Some(vec![IrOp::PushI32(0)]), + }, + ]; + assert_eq!(run_word(&ops), vec![0]); + } + + #[test] + fn execute_if_without_else() { + let ops = vec![ + IrOp::PushI32(99), + IrOp::PushI32(-1), + IrOp::If { + then_body: vec![IrOp::PushI32(42)], + else_body: None, + }, + ]; + assert_eq!(run_word(&ops), vec![42, 99]); + + let ops = vec![ + IrOp::PushI32(99), + IrOp::PushI32(0), + IrOp::If { + then_body: vec![IrOp::PushI32(42)], + else_body: None, + }, + ]; + assert_eq!(run_word(&ops), vec![99]); + } + + #[test] + fn execute_nested_if() { + let ops = vec![ + IrOp::PushI32(-1), + IrOp::If { + then_body: vec![ + IrOp::PushI32(-1), + IrOp::If { + then_body: vec![IrOp::PushI32(1)], + else_body: Some(vec![IrOp::PushI32(2)]), + }, + ], + else_body: Some(vec![IrOp::PushI32(3)]), + }, + ]; + assert_eq!(run_word(&ops), vec![1]); + } + + #[test] + fn execute_begin_until() { + // Count down from 3 + let ops = vec![ + IrOp::PushI32(3), + IrOp::BeginUntil { + body: vec![IrOp::PushI32(1), IrOp::Sub, IrOp::Dup, IrOp::ZeroEq], + }, + ]; + assert_eq!(run_word(&ops), vec![0]); + } + + #[test] + fn execute_return_stack() { + let ops = vec![IrOp::PushI32(42), IrOp::ToR, IrOp::PushI32(99), IrOp::FromR]; + assert_eq!(run_word(&ops), vec![42, 99]); + } + + #[test] + fn execute_rfetch() { + let ops = vec![IrOp::PushI32(42), IrOp::ToR, IrOp::RFetch, IrOp::FromR]; + assert_eq!(run_word(&ops), vec![42, 42]); + } + + #[test] + fn execute_nip() { + assert_eq!( + run_word(&[IrOp::PushI32(1), IrOp::PushI32(2), IrOp::Nip]), + vec![2] + ); + } + + #[test] + fn execute_tuck() { + // ( 1 2 -- 2 1 2 ) + assert_eq!( + run_word(&[IrOp::PushI32(1), IrOp::PushI32(2), IrOp::Tuck]), + vec![2, 1, 2], + ); + } + + #[test] + fn execute_plus_store() { + let ops = vec![ + IrOp::PushI32(10), + IrOp::PushI32(0x100), + IrOp::Store, + IrOp::PushI32(5), + IrOp::PushI32(0x100), + IrOp::PlusStore, + IrOp::PushI32(0x100), + IrOp::Fetch, + ]; + assert_eq!(run_word(&ops), vec![15]); + } + + #[test] + fn execute_complex_expression() { + // (3 + 4) * 2 = 14 + let ops = vec![ + IrOp::PushI32(3), + IrOp::PushI32(4), + IrOp::Add, + IrOp::PushI32(2), + IrOp::Mul, + ]; + assert_eq!(run_word(&ops), vec![14]); } } diff --git a/crates/core/src/dictionary.rs b/crates/core/src/dictionary.rs index a367efd..883fe64 100644 --- a/crates/core/src/dictionary.rs +++ b/crates/core/src/dictionary.rs @@ -297,9 +297,7 @@ impl Dictionary { /// Toggle the IMMEDIATE flag on the most recent word. pub fn toggle_immediate(&mut self) -> WaferResult<()> { if self.latest == 0 && self.here == DICTIONARY_BASE { - return Err(WaferError::CompileError( - "no word defined yet".to_string(), - )); + return Err(WaferError::CompileError("no word defined yet".to_string())); } let flags_addr = (self.latest + 4) as usize; if flags_addr >= self.memory.len() { diff --git a/crates/core/src/outer.rs b/crates/core/src/outer.rs index 8cf19a1..48e6012 100644 --- a/crates/core/src/outer.rs +++ b/crates/core/src/outer.rs @@ -8,17 +8,1335 @@ //! 5. If number: push (interpret) or compile as literal (compile mode) //! 6. If neither: error -// TODO: Step 8 - Outer interpreter implementation -// - Tokenizer (whitespace splitting, string literals) -// - Number parsing (decimal, #decimal, $hex, %binary per Forth 2012) -// - Main interpret/compile dispatch loop -// - STATE management -// - EVALUATE support (nested interpretation) +use std::sync::{Arc, Mutex}; + +use wasmtime::{ + Engine, Func, FuncType, Global, Instance, Memory, Module, Mutability, Ref, RefType, Store, + Table, Val, ValType, +}; + +use crate::codegen::{CodegenConfig, CompiledModule, compile_word}; +use crate::dictionary::{Dictionary, WordId}; +use crate::ir::IrOp; +use crate::memory::{CELL_SIZE, DATA_STACK_TOP, RETURN_STACK_TOP}; + +// --------------------------------------------------------------------------- +// Control-flow compilation state +// --------------------------------------------------------------------------- + +/// Control-flow entry on the compile-time control stack. +#[derive(Debug)] +enum ControlEntry { + If { + then_body: Vec, + }, + IfElse { + then_body: Vec, + else_body: Vec, + }, + Do { + body: Vec, + }, + Begin { + body: Vec, + }, + BeginWhile { + test: Vec, + body: Vec, + }, +} + +// --------------------------------------------------------------------------- +// VM state stored in the wasmtime Store +// --------------------------------------------------------------------------- + +/// Host-side state accessible from WASM callbacks. +struct VmHost { + #[allow(dead_code)] + output: Arc>, +} + +// --------------------------------------------------------------------------- +// ForthVM +// --------------------------------------------------------------------------- + +/// The complete Forth virtual machine -- owns dictionary, WASM runtime, and state. +pub struct ForthVM { + dictionary: Dictionary, + engine: Engine, + store: Store, + memory: Memory, + table: Table, + dsp: Global, + rsp: Global, + /// 0 = interpreting, -1 = compiling + state: i32, + /// Number base (default 10) + base: u32, + input_buffer: String, + input_pos: usize, + // Compilation state + compiling_name: Option, + compiling_ir: Vec, + control_stack: Vec, + compiling_word_id: Option, + // Output buffer + output: Arc>, + // Next table index (mirrors dictionary.next_fn_index conceptually, + // but we track what's actually in the wasmtime table) + next_table_index: u32, + // The emit function (shared across all instantiated modules) + emit_func: Func, + // Dot (print number) function -- kept for potential future use + #[allow(dead_code)] + dot_func: Func, +} + +impl ForthVM { + /// Boot a new Forth VM with all primitives registered. + pub fn new() -> anyhow::Result { + let engine = Engine::default(); + let output = Arc::new(Mutex::new(String::new())); + + let host = VmHost { + output: Arc::clone(&output), + }; + let mut store = Store::new(&engine, host); + + // Shared linear memory (16 pages = 1 MiB) + let memory = Memory::new(&mut store, wasmtime::MemoryType::new(16, None))?; + + // Data stack pointer global + let dsp = Global::new( + &mut store, + wasmtime::GlobalType::new(ValType::I32, Mutability::Var), + Val::I32(DATA_STACK_TOP as i32), + )?; + + // Return stack pointer global + let rsp = Global::new( + &mut store, + wasmtime::GlobalType::new(ValType::I32, Mutability::Var), + Val::I32(RETURN_STACK_TOP as i32), + )?; + + // Function table (initial 256 entries) + let table = Table::new( + &mut store, + wasmtime::TableType::new(RefType::FUNCREF, 256, None), + Ref::Func(None), + )?; + + // Create emit host function: (i32) -> () + let out_ref = Arc::clone(&output); + let emit_func = Func::new( + &mut store, + FuncType::new(&engine, [ValType::I32], []), + move |_caller, params, _results| { + let ch = params[0].unwrap_i32() as u8 as char; + out_ref.lock().unwrap().push(ch); + Ok(()) + }, + ); + + // Create dot host function: (i32) -> () + // This is used to implement `.` -- it pops TOS and prints it. + // We create a host function that takes i32, converts to string, appends to output. + let out_ref2 = Arc::clone(&output); + let dot_func = Func::new( + &mut store, + FuncType::new(&engine, [ValType::I32], []), + move |_caller, params, _results| { + let n = params[0].unwrap_i32(); + let s = format!("{n} "); + out_ref2.lock().unwrap().push_str(&s); + Ok(()) + }, + ); + + let dictionary = Dictionary::new(); + + let mut vm = ForthVM { + dictionary, + engine, + store, + memory, + table, + dsp, + rsp, + state: 0, + base: 10, + input_buffer: String::new(), + input_pos: 0, + compiling_name: None, + compiling_ir: Vec::new(), + control_stack: Vec::new(), + compiling_word_id: None, + output, + next_table_index: 0, + emit_func, + dot_func, + }; + + vm.register_primitives()?; + + Ok(vm) + } + + /// Evaluate a line of Forth input. + pub fn evaluate(&mut self, input: &str) -> anyhow::Result<()> { + self.input_buffer = input.to_string(); + self.input_pos = 0; + + while let Some(token) = self.next_token() { + self.interpret_token(&token)?; + } + + Ok(()) + } + + /// Check if the VM is currently in compile mode. + pub fn is_compiling(&self) -> bool { + self.state != 0 + } + + /// Get and clear the output buffer. + pub fn take_output(&mut self) -> String { + let mut out = self.output.lock().unwrap(); + let s = out.clone(); + out.clear(); + s + } + + /// Read the current data stack contents (top-first). + pub fn data_stack(&mut self) -> Vec { + let sp = self.dsp.get(&mut self.store).unwrap_i32() as u32; + let data = self.memory.data(&self.store); + let mut stack = Vec::new(); + let mut addr = sp; + while addr < DATA_STACK_TOP { + let b: [u8; 4] = data[addr as usize..addr as usize + 4].try_into().unwrap(); + stack.push(i32::from_le_bytes(b)); + addr += CELL_SIZE; + } + stack + } + + // ----------------------------------------------------------------------- + // Internal: tokenizer + // ----------------------------------------------------------------------- + + /// Read the next whitespace-delimited token from the input buffer. + fn next_token(&mut self) -> Option { + let bytes = self.input_buffer.as_bytes(); + // Skip whitespace + while self.input_pos < bytes.len() && bytes[self.input_pos].is_ascii_whitespace() { + self.input_pos += 1; + } + if self.input_pos >= bytes.len() { + return None; + } + let start = self.input_pos; + while self.input_pos < bytes.len() && !bytes[self.input_pos].is_ascii_whitespace() { + self.input_pos += 1; + } + Some(String::from_utf8_lossy(&bytes[start..self.input_pos]).to_string()) + } + + /// Read from the input buffer until the given delimiter character. + /// Returns the collected string (not including the delimiter). + fn parse_until(&mut self, delim: char) -> Option { + let bytes = self.input_buffer.as_bytes(); + // Skip one leading space if present + if self.input_pos < bytes.len() && bytes[self.input_pos] == b' ' { + self.input_pos += 1; + } + let start = self.input_pos; + while self.input_pos < bytes.len() && bytes[self.input_pos] != delim as u8 { + self.input_pos += 1; + } + if self.input_pos > start || self.input_pos < bytes.len() { + let result = String::from_utf8_lossy(&bytes[start..self.input_pos]).to_string(); + // Skip past the delimiter + if self.input_pos < bytes.len() { + self.input_pos += 1; + } + Some(result) + } else { + None + } + } + + // ----------------------------------------------------------------------- + // Internal: interpret/compile dispatch + // ----------------------------------------------------------------------- + + /// Process a single token in the current mode (interpret or compile). + fn interpret_token(&mut self, token: &str) -> anyhow::Result<()> { + let token_upper = token.to_ascii_uppercase(); + + // Handle colon definition start + if token_upper == ":" { + return self.start_colon_def(); + } + + // Handle semicolon + if token_upper == ";" { + if self.state == 0 { + anyhow::bail!("unexpected ;"); + } + return self.finish_colon_def(); + } + + if self.state != 0 { + // Compile mode + self.compile_token(token)?; + } else { + // Interpret mode + self.interpret_token_immediate(token)?; + } + + Ok(()) + } + + /// Interpret a token in immediate (interpret) mode. + fn interpret_token_immediate(&mut self, token: &str) -> anyhow::Result<()> { + // Special handling for string literals in interpret mode + let token_upper = token.to_ascii_uppercase(); + if token_upper == ".\"" { + // Parse until closing quote and print + if let Some(s) = self.parse_until('"') { + self.output.lock().unwrap().push_str(&s); + } + return Ok(()); + } + if token_upper == "(" { + // Comment -- skip until ) + self.parse_until(')'); + return Ok(()); + } + if token_upper == "\\" { + // Line comment -- skip rest of input + self.input_pos = self.input_buffer.len(); + return Ok(()); + } + + // Look up in dictionary + if let Some((_addr, word_id, _is_immediate)) = self.dictionary.find(token) { + self.execute_word(word_id)?; + return Ok(()); + } + + // Try to parse as number + if let Some(n) = self.parse_number(token) { + self.push_data_stack(n)?; + return Ok(()); + } + + anyhow::bail!("unknown word: {}", token); + } + + /// Compile a token in compile mode. + fn compile_token(&mut self, token: &str) -> anyhow::Result<()> { + let token_upper = token.to_ascii_uppercase(); + + // Handle string literals in compile mode + if token_upper == ".\"" { + // Parse until closing quote, emit characters as EMIT calls + if let Some(s) = self.parse_until('"') { + for ch in s.chars() { + self.push_ir(IrOp::PushI32(ch as i32)); + self.push_ir(IrOp::Emit); + } + } + return Ok(()); + } + if token_upper == "S\"" { + // TODO: string literal on stack + self.parse_until('"'); + return Ok(()); + } + if token_upper == "(" { + self.parse_until(')'); + return Ok(()); + } + if token_upper == "\\" { + self.input_pos = self.input_buffer.len(); + return Ok(()); + } + + // Check control flow words (these are handled structurally) + match token_upper.as_str() { + "IF" => return self.compile_if(), + "ELSE" => return self.compile_else(), + "THEN" => return self.compile_then(), + "DO" => return self.compile_do(), + "LOOP" => return self.compile_loop(false), + "+LOOP" => return self.compile_loop(true), + "BEGIN" => return self.compile_begin(), + "UNTIL" => return self.compile_until(), + "WHILE" => return self.compile_while(), + "REPEAT" => return self.compile_repeat(), + "RECURSE" => { + if let Some(word_id) = self.compiling_word_id { + self.push_ir(IrOp::Call(word_id)); + } + return Ok(()); + } + "EXIT" => { + self.push_ir(IrOp::Exit); + return Ok(()); + } + "[" => { + self.state = 0; + return Ok(()); + } + "]" => { + self.state = -1; + return Ok(()); + } + "LITERAL" => { + // compile-time: pop from data stack, compile as literal + let stack = self.data_stack(); + if let Some(&n) = stack.first() { + self.pop_data_stack()?; + self.push_ir(IrOp::PushI32(n)); + } + return Ok(()); + } + "POSTPONE" => { + // Read next token, compile a call to it + if let Some(next) = self.next_token() { + if let Some((_addr, word_id, _imm)) = self.dictionary.find(&next) { + self.push_ir(IrOp::Call(word_id)); + } else { + anyhow::bail!("POSTPONE: unknown word: {}", next); + } + } + return Ok(()); + } + _ => {} + } + + // Look up in dictionary + if let Some((_addr, word_id, is_immediate)) = self.dictionary.find(token) { + if is_immediate { + // Execute immediately even in compile mode + self.execute_word(word_id)?; + } else { + self.push_ir(IrOp::Call(word_id)); + } + return Ok(()); + } + + // Try to parse as number + if let Some(n) = self.parse_number(token) { + self.push_ir(IrOp::PushI32(n)); + return Ok(()); + } + + anyhow::bail!("unknown word: {}", token); + } + + // ----------------------------------------------------------------------- + // Control flow compilation + // ----------------------------------------------------------------------- + + fn compile_if(&mut self) -> anyhow::Result<()> { + // Save current IR and start collecting then_body + let saved = std::mem::take(&mut self.compiling_ir); + self.control_stack.push(ControlEntry::If { + then_body: Vec::new(), + }); + // The saved IR goes back as the "outer" compiling_ir -- but we need a + // different approach. Let's store the prefix in the control entry and + // make compiling_ir the then_body. + // Actually, the right pattern: we push a frame, and the current IR + // becomes the prefix. When THEN is reached, we pop the frame, build + // the IrOp::If, and append it to the prefix. + + // Put the prefix aside in the control entry itself. + // We'll repurpose: then_body starts empty (will be compiling_ir from now on). + // The prefix (current compiling_ir) is stashed. + // On THEN, we pop the control entry, take compiling_ir as then_body, + // restore the prefix, and append If{then_body, else_body}. + + // Let me restructure: use a separate prefix stack. + // Actually the simplest approach: stash the current compiling_ir into + // the control entry, and start fresh for the then_body. + self.control_stack.pop(); // remove the one we just pushed + self.control_stack.push(ControlEntry::If { + then_body: saved, // this is actually the prefix + }); + // compiling_ir is now empty and will collect the then_body + Ok(()) + } + + fn compile_else(&mut self) -> anyhow::Result<()> { + match self.control_stack.pop() { + Some(ControlEntry::If { then_body: prefix }) => { + // compiling_ir has the then_body ops + let then_body = std::mem::take(&mut self.compiling_ir); + self.control_stack.push(ControlEntry::IfElse { + then_body, + else_body: prefix, // stash prefix as else_body temporarily + }); + // compiling_ir is now empty and will collect the else_body + } + _ => anyhow::bail!("ELSE without matching IF"), + } + Ok(()) + } + + fn compile_then(&mut self) -> anyhow::Result<()> { + match self.control_stack.pop() { + Some(ControlEntry::If { then_body: prefix }) => { + // compiling_ir has the then_body ops + let then_body = std::mem::take(&mut self.compiling_ir); + // Restore prefix and append the If node + self.compiling_ir = prefix; + self.compiling_ir.push(IrOp::If { + then_body, + else_body: None, + }); + } + Some(ControlEntry::IfElse { + then_body, + else_body: prefix, + }) => { + // compiling_ir has the else_body ops + let else_body = std::mem::take(&mut self.compiling_ir); + self.compiling_ir = prefix; + self.compiling_ir.push(IrOp::If { + then_body, + else_body: Some(else_body), + }); + } + _ => anyhow::bail!("THEN without matching IF"), + } + Ok(()) + } + + fn compile_do(&mut self) -> anyhow::Result<()> { + let prefix = std::mem::take(&mut self.compiling_ir); + self.control_stack.push(ControlEntry::Do { body: prefix }); + Ok(()) + } + + fn compile_loop(&mut self, is_plus_loop: bool) -> anyhow::Result<()> { + match self.control_stack.pop() { + Some(ControlEntry::Do { body: prefix }) => { + let body = std::mem::take(&mut self.compiling_ir); + self.compiling_ir = prefix; + self.compiling_ir.push(IrOp::DoLoop { body, is_plus_loop }); + } + _ => anyhow::bail!("LOOP without matching DO"), + } + Ok(()) + } + + fn compile_begin(&mut self) -> anyhow::Result<()> { + let prefix = std::mem::take(&mut self.compiling_ir); + self.control_stack + .push(ControlEntry::Begin { body: prefix }); + Ok(()) + } + + fn compile_until(&mut self) -> anyhow::Result<()> { + match self.control_stack.pop() { + Some(ControlEntry::Begin { body: prefix }) => { + let body = std::mem::take(&mut self.compiling_ir); + self.compiling_ir = prefix; + self.compiling_ir.push(IrOp::BeginUntil { body }); + } + _ => anyhow::bail!("UNTIL without matching BEGIN"), + } + Ok(()) + } + + fn compile_while(&mut self) -> anyhow::Result<()> { + match self.control_stack.pop() { + Some(ControlEntry::Begin { body: prefix }) => { + let test = std::mem::take(&mut self.compiling_ir); + self.control_stack.push(ControlEntry::BeginWhile { + test, + body: prefix, // stash prefix + }); + // compiling_ir now empty, collects the body + } + _ => anyhow::bail!("WHILE without matching BEGIN"), + } + Ok(()) + } + + fn compile_repeat(&mut self) -> anyhow::Result<()> { + match self.control_stack.pop() { + Some(ControlEntry::BeginWhile { test, body: prefix }) => { + let body = std::mem::take(&mut self.compiling_ir); + self.compiling_ir = prefix; + self.compiling_ir + .push(IrOp::BeginWhileRepeat { test, body }); + } + _ => anyhow::bail!("REPEAT without matching BEGIN...WHILE"), + } + Ok(()) + } + + // ----------------------------------------------------------------------- + // Colon definition + // ----------------------------------------------------------------------- + + fn start_colon_def(&mut self) -> anyhow::Result<()> { + if self.state != 0 { + anyhow::bail!("nested colon definitions not allowed"); + } + let name = self + .next_token() + .ok_or_else(|| anyhow::anyhow!("expected word name after :"))?; + + // Create the dictionary entry (hidden until ; reveals it) + let word_id = self + .dictionary + .create(&name, false) + .map_err(|e| anyhow::anyhow!("{}", e))?; + + self.compiling_name = Some(name); + self.compiling_word_id = Some(word_id); + self.compiling_ir.clear(); + self.control_stack.clear(); + self.state = -1; + self.next_table_index = self.next_table_index.max(word_id.0 + 1); + + Ok(()) + } + + fn finish_colon_def(&mut self) -> anyhow::Result<()> { + if self.state == 0 { + anyhow::bail!("not in compile mode"); + } + if !self.control_stack.is_empty() { + anyhow::bail!("unresolved control structure"); + } + + let name = self + .compiling_name + .take() + .ok_or_else(|| anyhow::anyhow!("no word being compiled"))?; + let word_id = self + .compiling_word_id + .take() + .ok_or_else(|| anyhow::anyhow!("no word being compiled"))?; + let ir = std::mem::take(&mut self.compiling_ir); + + // Compile to WASM + let config = CodegenConfig { + base_fn_index: word_id.0, + table_size: self.table_size(), + }; + let compiled = compile_word(&name, &ir, &config) + .map_err(|e| anyhow::anyhow!("codegen error: {}", e))?; + + // Instantiate and install in the table + self.instantiate_and_install(&compiled, word_id)?; + + // Reveal the word + self.dictionary.reveal(); + self.state = 0; + + Ok(()) + } + + // ----------------------------------------------------------------------- + // WASM instantiation + // ----------------------------------------------------------------------- + + /// Get the current table size. + fn table_size(&self) -> u32 { + self.table.size(&self.store) as u32 + } + + /// Ensure the table is large enough for the given index. + fn ensure_table_size(&mut self, needed: u32) -> anyhow::Result<()> { + let current = self.table.size(&self.store); + let needed64 = needed as u64; + if needed64 >= current { + let grow_by = needed64 - current + 1; + self.table.grow(&mut self.store, grow_by, Ref::Func(None))?; + } + Ok(()) + } + + /// Instantiate a compiled WASM module and install its function in the table. + fn instantiate_and_install( + &mut self, + compiled: &CompiledModule, + word_id: WordId, + ) -> anyhow::Result<()> { + self.ensure_table_size(word_id.0)?; + + let module = Module::new(&self.engine, &compiled.bytes)?; + let instance = Instance::new( + &mut self.store, + &module, + &[ + self.emit_func.into(), + self.memory.into(), + self.dsp.into(), + self.rsp.into(), + self.table.into(), + ], + )?; + + // Get the exported function and install it in our shared table + let func = instance + .get_func(&mut self.store, "fn") + .ok_or_else(|| anyhow::anyhow!("compiled module missing 'fn' export"))?; + + self.table + .set(&mut self.store, word_id.0 as u64, Ref::Func(Some(func)))?; + + Ok(()) + } + + // ----------------------------------------------------------------------- + // Word execution + // ----------------------------------------------------------------------- + + /// Execute a word by its WordId (calls through the function table). + fn execute_word(&mut self, word_id: WordId) -> anyhow::Result<()> { + let r = self + .table + .get(&mut self.store, word_id.0 as u64) + .ok_or_else(|| anyhow::anyhow!("word {} not in function table", word_id.0))?; + let func = *r + .unwrap_func() + .ok_or_else(|| anyhow::anyhow!("word {} is null funcref", word_id.0))?; + + func.call(&mut self.store, &[], &mut [])?; + Ok(()) + } + + // ----------------------------------------------------------------------- + // Data stack operations + // ----------------------------------------------------------------------- + + /// Push a value onto the data stack. + fn push_data_stack(&mut self, value: i32) -> anyhow::Result<()> { + let sp = self.dsp.get(&mut self.store).unwrap_i32() as u32; + let new_sp = sp - CELL_SIZE; + let data = self.memory.data_mut(&mut self.store); + let bytes = value.to_le_bytes(); + data[new_sp as usize..new_sp as usize + 4].copy_from_slice(&bytes); + self.dsp.set(&mut self.store, Val::I32(new_sp as i32))?; + Ok(()) + } + + /// Pop a value from the data stack. + fn pop_data_stack(&mut self) -> anyhow::Result { + let sp = self.dsp.get(&mut self.store).unwrap_i32() as u32; + if sp >= DATA_STACK_TOP { + anyhow::bail!("stack underflow"); + } + let data = self.memory.data(&self.store); + let b: [u8; 4] = data[sp as usize..sp as usize + 4].try_into().unwrap(); + let value = i32::from_le_bytes(b); + self.dsp + .set(&mut self.store, Val::I32((sp + CELL_SIZE) as i32))?; + Ok(value) + } + + // ----------------------------------------------------------------------- + // Number parsing + // ----------------------------------------------------------------------- + + /// Try to parse a token as a number. + fn parse_number(&self, token: &str) -> Option { + let token = token.trim(); + if token.is_empty() { + return None; + } + + // Check for negative prefix + let (negative, rest) = if let Some(stripped) = token.strip_prefix('-') { + (true, stripped) + } else { + (false, token) + }; + + if rest.is_empty() { + return None; + } + + // Parse based on prefix + let result = if let Some(hex) = rest.strip_prefix('$') { + i64::from_str_radix(hex, 16).ok() + } else if let Some(dec) = rest.strip_prefix('#') { + dec.parse::().ok() + } else if let Some(bin) = rest.strip_prefix('%') { + i64::from_str_radix(bin, 2).ok() + } else { + i64::from_str_radix(rest, self.base).ok() + }; + + result.map(|n| if negative { -(n as i32) } else { n as i32 }) + } + + // ----------------------------------------------------------------------- + // Push IR to the active body + // ----------------------------------------------------------------------- + + /// Push an IR op into the current compilation target. + fn push_ir(&mut self, op: IrOp) { + self.compiling_ir.push(op); + } + + // ----------------------------------------------------------------------- + // Primitive registration + // ----------------------------------------------------------------------- + + /// Register a primitive word by compiling its IR body and installing it. + fn register_primitive( + &mut self, + name: &str, + immediate: bool, + ir_body: Vec, + ) -> anyhow::Result { + let word_id = self + .dictionary + .create(name, immediate) + .map_err(|e| anyhow::anyhow!("{}", e))?; + + let config = CodegenConfig { + base_fn_index: word_id.0, + table_size: self.table_size(), + }; + let compiled = compile_word(name, &ir_body, &config) + .map_err(|e| anyhow::anyhow!("codegen error for {}: {}", name, e))?; + + self.instantiate_and_install(&compiled, word_id)?; + self.dictionary.reveal(); + self.next_table_index = self.next_table_index.max(word_id.0 + 1); + + Ok(word_id) + } + + /// Register a primitive whose implementation is a host function (not IR-compiled). + fn register_host_primitive( + &mut self, + name: &str, + immediate: bool, + func: Func, + ) -> anyhow::Result { + let word_id = self + .dictionary + .create(name, immediate) + .map_err(|e| anyhow::anyhow!("{}", e))?; + + self.ensure_table_size(word_id.0)?; + self.table + .set(&mut self.store, word_id.0 as u64, Ref::Func(Some(func)))?; + self.dictionary.reveal(); + self.next_table_index = self.next_table_index.max(word_id.0 + 1); + + Ok(word_id) + } + + /// Register all built-in primitive words. + fn register_primitives(&mut self) -> anyhow::Result<()> { + // -- Stack manipulation -- + self.register_primitive("DUP", false, vec![IrOp::Dup])?; + self.register_primitive("DROP", false, vec![IrOp::Drop])?; + self.register_primitive("SWAP", false, vec![IrOp::Swap])?; + self.register_primitive("OVER", false, vec![IrOp::Over])?; + self.register_primitive("ROT", false, vec![IrOp::Rot])?; + self.register_primitive("NIP", false, vec![IrOp::Nip])?; + self.register_primitive("TUCK", false, vec![IrOp::Tuck])?; + + // -- Arithmetic -- + self.register_primitive("+", false, vec![IrOp::Add])?; + self.register_primitive("-", false, vec![IrOp::Sub])?; + self.register_primitive("*", false, vec![IrOp::Mul])?; + self.register_primitive("/MOD", false, vec![IrOp::DivMod])?; + self.register_primitive("NEGATE", false, vec![IrOp::Negate])?; + self.register_primitive("ABS", false, vec![IrOp::Abs])?; + // / and MOD in terms of /MOD + self.register_primitive("/", false, vec![IrOp::DivMod, IrOp::Swap, IrOp::Drop])?; + self.register_primitive("MOD", false, vec![IrOp::DivMod, IrOp::Drop])?; + + // -- Comparison -- + self.register_primitive("=", false, vec![IrOp::Eq])?; + self.register_primitive("<>", false, vec![IrOp::NotEq])?; + self.register_primitive("<", false, vec![IrOp::Lt])?; + self.register_primitive(">", false, vec![IrOp::Gt])?; + self.register_primitive("U<", false, vec![IrOp::LtUnsigned])?; + self.register_primitive("0=", false, vec![IrOp::ZeroEq])?; + self.register_primitive("0<", false, vec![IrOp::ZeroLt])?; + + // -- Logic -- + self.register_primitive("AND", false, vec![IrOp::And])?; + self.register_primitive("OR", false, vec![IrOp::Or])?; + self.register_primitive("XOR", false, vec![IrOp::Xor])?; + self.register_primitive("INVERT", false, vec![IrOp::Invert])?; + self.register_primitive("LSHIFT", false, vec![IrOp::Lshift])?; + self.register_primitive("RSHIFT", false, vec![IrOp::Rshift])?; + + // -- Memory -- + self.register_primitive("@", false, vec![IrOp::Fetch])?; + self.register_primitive("!", false, vec![IrOp::Store])?; + self.register_primitive("C@", false, vec![IrOp::CFetch])?; + self.register_primitive("C!", false, vec![IrOp::CStore])?; + self.register_primitive("+!", false, vec![IrOp::PlusStore])?; + + // -- Return stack -- + self.register_primitive(">R", false, vec![IrOp::ToR])?; + self.register_primitive("R>", false, vec![IrOp::FromR])?; + self.register_primitive("R@", false, vec![IrOp::RFetch])?; + + // -- I/O -- + self.register_primitive("EMIT", false, vec![IrOp::Emit])?; + self.register_primitive("CR", false, vec![IrOp::Cr])?; + + // -- Constants -- + self.register_primitive("TRUE", false, vec![IrOp::PushI32(-1)])?; + self.register_primitive("FALSE", false, vec![IrOp::PushI32(0)])?; + self.register_primitive("BL", false, vec![IrOp::PushI32(32)])?; + self.register_primitive("SPACE", false, vec![IrOp::PushI32(32), IrOp::Emit])?; + + // -- 1+ 1- 2* 2/ -- + self.register_primitive("1+", false, vec![IrOp::PushI32(1), IrOp::Add])?; + self.register_primitive("1-", false, vec![IrOp::PushI32(1), IrOp::Sub])?; + self.register_primitive("2*", false, vec![IrOp::PushI32(1), IrOp::Lshift])?; + self.register_primitive("2/", false, vec![IrOp::PushI32(1), IrOp::Rshift])?; + + // -- I/O: . (dot) needs host function because it does number-to-string -- + // We'll compile a word that pops and calls a host function. + // The simplest approach: make DOT a host function that reads the stack + // directly via memory + dsp. + self.register_dot()?; + + // -- .S (print stack) -- + self.register_dot_s()?; + + // -- DEPTH -- + self.register_depth()?; + + Ok(()) + } + + /// Register the `.` (dot) word as a host function. + fn register_dot(&mut self) -> anyhow::Result<()> { + let memory = self.memory; + let dsp = self.dsp; + let output = Arc::clone(&self.output); + let base_val = self.base; + + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _params, _results| { + // Read top of data stack + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let data = memory.data(&caller); + let b: [u8; 4] = data[sp as usize..sp as usize + 4].try_into().unwrap(); + let value = i32::from_le_bytes(b); + // Increment dsp (pop) + dsp.set(&mut caller, Val::I32((sp + CELL_SIZE) as i32))?; + // Format number + let s = if base_val == 16 { + if value < 0 { + format!("-{:X} ", -(value as i64)) + } else { + format!("{:X} ", value) + } + } else { + format!("{} ", value) + }; + output.lock().unwrap().push_str(&s); + Ok(()) + }, + ); + + self.register_host_primitive(".", false, func)?; + Ok(()) + } + + /// Register `.S` (print stack without consuming). + fn register_dot_s(&mut self) -> anyhow::Result<()> { + let memory = self.memory; + let dsp = self.dsp; + let output = Arc::clone(&self.output); + + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _params, _results| { + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let data = memory.data(&caller); + let depth = (DATA_STACK_TOP - sp) / CELL_SIZE; + let mut out = output.lock().unwrap(); + out.push_str(&format!("<{}> ", depth)); + // Print from bottom to top + let mut addr = DATA_STACK_TOP - CELL_SIZE; + while addr >= sp { + let b: [u8; 4] = data[addr as usize..addr as usize + 4].try_into().unwrap(); + let v = i32::from_le_bytes(b); + out.push_str(&format!("{} ", v)); + if addr < CELL_SIZE { + break; + } + addr -= CELL_SIZE; + } + Ok(()) + }, + ); + + self.register_host_primitive(".S", false, func)?; + Ok(()) + } + + /// Register DEPTH word. + fn register_depth(&mut self) -> anyhow::Result<()> { + let memory = self.memory; + let dsp_global = self.dsp; + + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _params, _results| { + let sp = dsp_global.get(&mut caller).unwrap_i32() as u32; + let depth = ((DATA_STACK_TOP - sp) / CELL_SIZE) as i32; + // Push depth onto stack + let new_sp = sp - CELL_SIZE; + let data = memory.data_mut(&mut caller); + let bytes = depth.to_le_bytes(); + data[new_sp as usize..new_sp as usize + 4].copy_from_slice(&bytes); + dsp_global.set(&mut caller, Val::I32(new_sp as i32))?; + Ok(()) + }, + ); + + self.register_host_primitive("DEPTH", false, func)?; + Ok(()) + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- #[cfg(test)] mod tests { + use super::*; + + fn eval(input: &str) -> (Vec, String) { + let mut vm = ForthVM::new().unwrap(); + vm.evaluate(input).unwrap(); + let output = vm.take_output(); + let stack = vm.data_stack(); + (stack, output) + } + + fn eval_output(input: &str) -> String { + let (_, output) = eval(input); + output + } + + fn eval_stack(input: &str) -> Vec { + let (stack, _) = eval(input); + stack + } + + // -- Basic stack operations -- + #[test] - fn placeholder() { - // Outer interpreter tests will be added in Step 8 + fn test_push_number() { + assert_eq!(eval_stack("42"), vec![42]); + } + + #[test] + fn test_push_multiple() { + assert_eq!(eval_stack("1 2 3"), vec![3, 2, 1]); + } + + #[test] + fn test_negative_number() { + assert_eq!(eval_stack("-5"), vec![-5]); + } + + #[test] + fn test_hex_number() { + assert_eq!(eval_stack("$FF"), vec![255]); + } + + #[test] + fn test_binary_number() { + assert_eq!(eval_stack("%1010"), vec![10]); + } + + // -- Arithmetic -- + + #[test] + fn test_add() { + assert_eq!(eval_stack("2 3 +"), vec![5]); + } + + #[test] + fn test_sub() { + assert_eq!(eval_stack("10 3 -"), vec![7]); + } + + #[test] + fn test_mul() { + assert_eq!(eval_stack("6 7 *"), vec![42]); + } + + #[test] + fn test_div() { + assert_eq!(eval_stack("10 3 /"), vec![3]); + } + + #[test] + fn test_mod() { + assert_eq!(eval_stack("10 3 MOD"), vec![1]); + } + + // -- I/O -- + + #[test] + fn test_dot() { + assert_eq!(eval_output("42 ."), "42 "); + } + + #[test] + fn test_dot_negative() { + assert_eq!(eval_output("-5 ."), "-5 "); + } + + #[test] + fn test_emit() { + assert_eq!(eval_output("65 EMIT"), "A"); + } + + #[test] + fn test_cr() { + assert_eq!(eval_output("CR"), "\n"); + } + + // -- Colon definitions -- + + #[test] + fn test_square() { + assert_eq!(eval_output(": SQUARE DUP * ; 7 SQUARE ."), "49 "); + } + + #[test] + fn test_two_plus_three() { + assert_eq!(eval_output("2 3 + ."), "5 "); + } + + #[test] + fn test_colon_def_with_call() { + assert_eq!( + eval_output(": DOUBLE DUP + ; : QUAD DOUBLE DOUBLE ; 3 QUAD ."), + "12 " + ); + } + + // -- Control flow -- + + #[test] + fn test_if_then() { + assert_eq!(eval_output(": TEST 1 > IF 42 THEN ; 5 TEST ."), "42 "); + } + + #[test] + fn test_if_else_then() { + assert_eq!( + eval_output(": ABS2 DUP 0< IF NEGATE THEN ; -5 ABS2 ."), + "5 " + ); + } + + #[test] + fn test_begin_until() { + // Count down from 3, push each value + assert_eq!( + eval_output(": COUNTDOWN BEGIN DUP . 1 - DUP 0= UNTIL DROP ; 3 COUNTDOWN"), + "3 2 1 " + ); + } + + #[test] + fn test_do_loop() { + assert_eq!( + eval_output(": TEST 5 0 DO 42 . LOOP ; TEST"), + "42 42 42 42 42 " + ); + } + + // -- Recursion -- + + #[test] + fn test_factorial() { + assert_eq!( + eval_output(": FACT DUP 1 > IF DUP 1 - RECURSE * THEN ; 5 FACT ."), + "120 " + ); + } + + // -- Comments -- + + #[test] + fn test_paren_comment() { + assert_eq!(eval_stack("1 ( this is a comment ) 2"), vec![2, 1]); + } + + #[test] + fn test_backslash_comment() { + assert_eq!(eval_stack("1 2 \\ this is ignored"), vec![2, 1]); + } + + // -- String output -- + + #[test] + fn test_dot_quote() { + assert_eq!(eval_output(".\" Hello World\""), "Hello World"); + } + + // -- Stack words -- + + #[test] + fn test_dup() { + assert_eq!(eval_stack("5 DUP"), vec![5, 5]); + } + + #[test] + fn test_drop() { + assert_eq!(eval_stack("1 2 DROP"), vec![1]); + } + + #[test] + fn test_swap() { + assert_eq!(eval_stack("1 2 SWAP"), vec![1, 2]); + } + + #[test] + fn test_over() { + assert_eq!(eval_stack("1 2 OVER"), vec![1, 2, 1]); + } + + #[test] + fn test_rot() { + // ( 1 2 3 -- 2 3 1 ) top-first: [1, 3, 2] + assert_eq!(eval_stack("1 2 3 ROT"), vec![1, 3, 2]); + } + + // -- Comparison -- + + #[test] + fn test_eq() { + assert_eq!(eval_stack("5 5 ="), vec![-1]); + assert_eq!(eval_stack("3 5 ="), vec![0]); + } + + #[test] + fn test_less_than() { + assert_eq!(eval_stack("3 5 <"), vec![-1]); + assert_eq!(eval_stack("5 3 <"), vec![0]); + } + + #[test] + fn test_greater_than() { + assert_eq!(eval_stack("5 3 >"), vec![-1]); + assert_eq!(eval_stack("3 5 >"), vec![0]); + } + + // -- Logic -- + + #[test] + fn test_and() { + assert_eq!(eval_stack("$FF $0F AND"), vec![0x0F]); + } + + #[test] + fn test_or() { + assert_eq!(eval_stack("$F0 $0F OR"), vec![0xFF]); + } + + #[test] + fn test_invert() { + assert_eq!(eval_stack("0 INVERT"), vec![-1]); + } + + // -- Constants -- + + #[test] + fn test_true_false() { + assert_eq!(eval_stack("TRUE"), vec![-1]); + assert_eq!(eval_stack("FALSE"), vec![0]); + } + + #[test] + fn test_bl() { + assert_eq!(eval_stack("BL"), vec![32]); + } + + // -- Complex programs -- + + #[test] + fn test_fibonacci() { + assert_eq!( + eval_output(": FIB DUP 1 > IF DUP 1 - RECURSE SWAP 2 - RECURSE + THEN ; 10 FIB ."), + "55 " + ); + } + + #[test] + fn test_begin_while_repeat() { + assert_eq!( + eval_output(": COUNTDOWN BEGIN DUP WHILE DUP . 1 - REPEAT DROP ; 3 COUNTDOWN"), + "3 2 1 " + ); + } + + #[test] + fn test_nested_if() { + assert_eq!( + eval_output( + ": CLASSIFY DUP 0< IF DROP .\" neg\" ELSE 0= IF .\" zero\" ELSE .\" pos\" THEN THEN ; -1 CLASSIFY" + ), + "neg" + ); + } + + #[test] + fn test_nested_if_zero() { + assert_eq!( + eval_output( + ": CLASSIFY DUP 0< IF DROP .\" neg\" ELSE 0= IF .\" zero\" ELSE .\" pos\" THEN THEN ; 0 CLASSIFY" + ), + "zero" + ); + } + + #[test] + fn test_nested_if_pos() { + assert_eq!( + eval_output( + ": CLASSIFY DUP 0< IF DROP .\" neg\" ELSE 0= IF .\" zero\" ELSE .\" pos\" THEN THEN ; 5 CLASSIFY" + ), + "pos" + ); + } + + // -- Multiple evaluations (simulating REPL) -- + + #[test] + fn test_multi_eval() { + let mut vm = ForthVM::new().unwrap(); + vm.evaluate(": SQUARE DUP * ;").unwrap(); + let _ = vm.take_output(); + vm.evaluate("7 SQUARE .").unwrap(); + assert_eq!(vm.take_output(), "49 "); } }