Files
WAFER/crates/core/src/codegen.rs
T
ok2 4bfe6976ee Fix LEAVE+LOOP hang, DEPTH off-by-one, division flavor, EVALUATE, WORD, ACCEPT
Six fixes for compliance test regressions introduced in Phases 7-8:

- LEAVE + +LOOP with step=0 caused infinite loop: the XOR termination
  check yields 0 when index=limit and step=0. Added SYSVAR_LEAVE_FLAG
  mechanism — LEAVE sets flag, +LOOP checks it, all loops clear on exit.

- DEPTH was off-by-one: `5440 SP@ -` pushed the literal before SP@
  read the stack pointer, making SP@ see one extra cell. Reordered to
  `SP@ 5440 SWAP -` so SP@ reads dsp before any literal push.

- */ and */MOD used FM/MOD (floored) but WAFER's / uses WASM i32.div_s
  (symmetric). Changed to SM/REM for consistency.

- EVALUATE didn't sync input buffer to WASM memory, breaking SOURCE
  and >IN manipulation inside evaluated strings. Added input-only sync
  (without touching STATE/BASE) and >IN readback after each token.

- WORD didn't skip leading spaces when delimiter != space, causing
  GN' and GS3 tests to read whitespace instead of content.

- Added ACCEPT stub returning 0 for non-interactive mode.

- Added bounds check in refresh_user_here to reject corrupted
  SYSVAR_HERE values beyond WASM memory size.

Core and Facility compliance suites now pass. Other suites have
pre-existing regressions from Phases 1-8 still under investigation.
2026-04-07 20:30:16 +02:00

3426 lines
114 KiB
Rust

//! WASM code generation from IR.
//!
//! Translates optimized IR into WASM bytecode using the `wasm-encoder` crate.
//! Stacks live in linear memory. The data-stack pointer (`$dsp`) is cached in
//! a WASM local for the duration of each function, with write-back to the
//! global before calls and at function exit. The return-stack pointer (`$rsp`)
//! remains a global.
use std::borrow::Cow;
use std::collections::HashMap;
use wasm_encoder::{
BlockType, CodeSection, ConstExpr, CustomSection, DataCountSection, DataSection,
ElementSection, Elements, EntityType, ExportKind, ExportSection, Function, FunctionSection,
GlobalType, ImportSection, Instruction, MemArg, MemoryType, Module, RefType, TableType,
TypeSection, ValType,
};
use crate::dictionary::WordId;
use crate::error::{WaferError, WaferResult};
use crate::ir::IrOp;
use crate::memory::{CELL_SIZE, SYSVAR_LEAVE_FLAG};
// ---------------------------------------------------------------------------
// Import indices (order matters: imports numbered sequentially by kind)
// ---------------------------------------------------------------------------
/// Index of the imported memory.
const MEMORY_INDEX: u32 = 0;
/// Index of the `$dsp` global (data stack pointer).
const DSP: u32 = 0;
/// Index of the `$rsp` global (return stack pointer).
const RSP: u32 = 1;
/// Index of the `$fsp` global (float stack pointer).
const FSP: u32 = 2;
/// Index of the imported function table.
const TABLE: u32 = 0;
// Type indices in the type section.
const TYPE_VOID: u32 = 0; // () -> ()
const TYPE_I32: u32 = 1; // (i32) -> ()
// The `emit` callback is the first (and only) imported function, so index 0.
// The compiled word is the first (and only) defined function, so index 1.
const EMIT_FUNC: u32 = 0;
const WORD_FUNC: u32 = 1;
// ---------------------------------------------------------------------------
// DSP caching: local 0 holds a cached copy of the $dsp global.
// Scratch locals start at SCRATCH_BASE (1) instead of 0.
// ---------------------------------------------------------------------------
/// WASM local index for the cached data-stack pointer.
const CACHED_DSP_LOCAL: u32 = 0;
/// First WASM local index available for scratch temporaries.
const SCRATCH_BASE: u32 = 1;
/// Natural-alignment `MemArg` for 4-byte i32 operations.
const MEM4: MemArg = MemArg {
offset: 0,
align: 2, // 2^2 = 4
memory_index: MEMORY_INDEX,
};
/// `MemArg` for single-byte operations.
const MEM1: MemArg = MemArg {
offset: 0,
align: 0, // 2^0 = 1
memory_index: MEMORY_INDEX,
};
/// Natural-alignment `MemArg` for 8-byte f64 operations.
const MEM8: MemArg = MemArg {
offset: 0,
align: 3, // 2^3 = 8
memory_index: MEMORY_INDEX,
};
// ---------------------------------------------------------------------------
// Public types
// ---------------------------------------------------------------------------
/// Configuration for code generation.
#[derive(Debug, Clone)]
pub struct CodegenConfig {
/// Base function index (for the function table).
pub base_fn_index: u32,
/// Number of functions already in the table.
pub table_size: u32,
/// Enable stack-to-local promotion for straight-line words.
pub stack_to_local_promotion: bool,
}
/// Result of compiling a word to WASM.
#[derive(Debug, Clone)]
pub struct CompiledModule {
/// The WASM binary bytes.
pub bytes: Vec<u8>,
/// Function index in the table for this word.
pub fn_index: u32,
}
// ---------------------------------------------------------------------------
// Instruction-level helpers (free functions that take &mut Function)
// ---------------------------------------------------------------------------
/// Decrement the cached `$dsp` local by `CELL_SIZE`.
fn dsp_dec(f: &mut Function) {
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
.instruction(&Instruction::I32Const(CELL_SIZE as i32))
.instruction(&Instruction::I32Sub)
.instruction(&Instruction::LocalSet(CACHED_DSP_LOCAL));
}
/// Increment the cached `$dsp` local by `CELL_SIZE`.
fn dsp_inc(f: &mut Function) {
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
.instruction(&Instruction::I32Const(CELL_SIZE as i32))
.instruction(&Instruction::I32Add)
.instruction(&Instruction::LocalSet(CACHED_DSP_LOCAL));
}
/// Push an i32 value that is already on the WASM operand stack onto the
/// data stack in linear memory, using `tmp` as a scratch local.
///
/// Sequence: local.set tmp; dsp -= 4; mem[dsp] = local.get tmp
fn push_via_local(f: &mut Function, tmp: u32) {
f.instruction(&Instruction::LocalSet(tmp));
dsp_dec(f);
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
.instruction(&Instruction::LocalGet(tmp))
.instruction(&Instruction::I32Store(MEM4));
}
/// Push a known i32 constant onto the data stack.
fn push_const(f: &mut Function, value: i32) {
dsp_dec(f);
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
.instruction(&Instruction::I32Const(value))
.instruction(&Instruction::I32Store(MEM4));
}
/// Pop the top of the data stack onto the WASM operand stack.
fn pop(f: &mut Function) {
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
.instruction(&Instruction::I32Load(MEM4));
dsp_inc(f);
}
/// Pop the top of the data stack into a local.
fn pop_to(f: &mut Function, local: u32) {
pop(f);
f.instruction(&Instruction::LocalSet(local));
}
/// Read the top of the data stack without popping (value on operand stack).
fn peek(f: &mut Function) {
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
.instruction(&Instruction::I32Load(MEM4));
}
/// Write the cached DSP local back to the `$dsp` global.
///
/// Emitted before calls and at function exit so callees see the correct value.
fn dsp_writeback(f: &mut Function) {
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
.instruction(&Instruction::GlobalSet(DSP));
}
/// Reload the cached DSP local from the `$dsp` global.
///
/// Emitted after calls since the callee may have modified `$dsp`.
fn dsp_reload(f: &mut Function) {
f.instruction(&Instruction::GlobalGet(DSP))
.instruction(&Instruction::LocalSet(CACHED_DSP_LOCAL));
}
/// Push a value from the WASM operand stack onto the return stack via `tmp`.
fn rpush_via_local(f: &mut Function, tmp: u32) {
f.instruction(&Instruction::LocalSet(tmp));
// rsp -= CELL_SIZE
f.instruction(&Instruction::GlobalGet(RSP))
.instruction(&Instruction::I32Const(CELL_SIZE as i32))
.instruction(&Instruction::I32Sub)
.instruction(&Instruction::GlobalSet(RSP));
// mem[rsp] = value
f.instruction(&Instruction::GlobalGet(RSP))
.instruction(&Instruction::LocalGet(tmp))
.instruction(&Instruction::I32Store(MEM4));
}
/// Pop the return stack onto the WASM operand stack.
fn rpop(f: &mut Function) {
f.instruction(&Instruction::GlobalGet(RSP))
.instruction(&Instruction::I32Load(MEM4));
// rsp += CELL_SIZE
f.instruction(&Instruction::GlobalGet(RSP))
.instruction(&Instruction::I32Const(CELL_SIZE as i32))
.instruction(&Instruction::I32Add)
.instruction(&Instruction::GlobalSet(RSP));
}
/// Peek at the top of the return stack (no pop).
fn rpeek(f: &mut Function) {
f.instruction(&Instruction::GlobalGet(RSP))
.instruction(&Instruction::I32Load(MEM4));
}
/// Convert a WASM boolean (0 or 1 on operand stack) to a Forth flag (0 or -1).
/// Uses `tmp` as scratch local.
fn bool_to_forth_flag(f: &mut Function, tmp: u32) {
// 0 - result: if result=1 => -1, if result=0 => 0
f.instruction(&Instruction::LocalSet(tmp))
.instruction(&Instruction::I32Const(0))
.instruction(&Instruction::LocalGet(tmp))
.instruction(&Instruction::I32Sub);
}
// ---------------------------------------------------------------------------
// Float stack helpers
// ---------------------------------------------------------------------------
/// Carries f64 scratch local indices for float codegen.
struct EmitCtx {
f64_local_0: u32,
f64_local_1: u32,
}
/// Decrement the FSP global by 8 (allocate space for one f64).
fn fsp_dec(f: &mut Function) {
f.instruction(&Instruction::GlobalGet(FSP))
.instruction(&Instruction::I32Const(8))
.instruction(&Instruction::I32Sub)
.instruction(&Instruction::GlobalSet(FSP));
}
/// Increment the FSP global by 8 (free space for one f64).
fn fsp_inc(f: &mut Function) {
f.instruction(&Instruction::GlobalGet(FSP))
.instruction(&Instruction::I32Const(8))
.instruction(&Instruction::I32Add)
.instruction(&Instruction::GlobalSet(FSP));
}
/// Save an f64 from the WASM operand stack into `tmp`, decrement FSP,
/// then store the f64 at [FSP].
fn fpush_via_local(f: &mut Function, tmp: u32) {
f.instruction(&Instruction::LocalSet(tmp));
fsp_dec(f);
f.instruction(&Instruction::GlobalGet(FSP))
.instruction(&Instruction::LocalGet(tmp))
.instruction(&Instruction::F64Store(MEM8));
}
/// Decrement FSP, then store the f64 from local `src` at [FSP].
fn fpush_from_local(f: &mut Function, src: u32) {
fsp_dec(f);
f.instruction(&Instruction::GlobalGet(FSP))
.instruction(&Instruction::LocalGet(src))
.instruction(&Instruction::F64Store(MEM8));
}
/// Load f64 from [FSP] onto the WASM operand stack, then increment FSP.
fn fpop(f: &mut Function) {
f.instruction(&Instruction::GlobalGet(FSP))
.instruction(&Instruction::F64Load(MEM8));
fsp_inc(f);
}
/// Load f64 from [FSP] onto the WASM operand stack without popping.
fn fpeek(f: &mut Function) {
f.instruction(&Instruction::GlobalGet(FSP))
.instruction(&Instruction::F64Load(MEM8));
}
/// Pop two floats (b then a), apply binary op, push result.
fn emit_float_binary(f: &mut Function, ctx: &EmitCtx, wasm_op: &Instruction<'_>) {
fpop(f);
f.instruction(&Instruction::LocalSet(ctx.f64_local_0));
fpop(f);
f.instruction(&Instruction::LocalSet(ctx.f64_local_1));
f.instruction(&Instruction::LocalGet(ctx.f64_local_1))
.instruction(&Instruction::LocalGet(ctx.f64_local_0))
.instruction(wasm_op);
fpush_via_local(f, ctx.f64_local_0);
}
/// Pop one float, apply unary op, push result.
fn emit_float_unary(f: &mut Function, ctx: &EmitCtx, wasm_op: &Instruction<'_>) {
fpop(f);
f.instruction(wasm_op);
fpush_via_local(f, ctx.f64_local_0);
}
/// Pop two floats, compare, push Forth flag to data stack.
fn emit_float_cmp(f: &mut Function, ctx: &EmitCtx, wasm_cmp: &Instruction<'_>) {
fpop(f);
f.instruction(&Instruction::LocalSet(ctx.f64_local_0));
fpop(f);
f.instruction(&Instruction::LocalSet(ctx.f64_local_1));
f.instruction(&Instruction::LocalGet(ctx.f64_local_1))
.instruction(&Instruction::LocalGet(ctx.f64_local_0))
.instruction(wasm_cmp);
bool_to_forth_flag(f, SCRATCH_BASE);
push_via_local(f, SCRATCH_BASE + 1);
}
// ---------------------------------------------------------------------------
// IR emission
// ---------------------------------------------------------------------------
/// Emit all IR operations in `ops` into the WASM function body `f`.
fn emit_body(f: &mut Function, ops: &[IrOp], ctx: &EmitCtx) {
for op in ops {
emit_op(f, op, ctx);
}
}
/// Emit a single IR operation.
#[allow(clippy::too_many_lines)]
fn emit_op(f: &mut Function, op: &IrOp, ctx: &EmitCtx) {
match op {
// -- Literals -------------------------------------------------------
IrOp::PushI32(n) => push_const(f, *n),
IrOp::PushI64(_) => { /* TODO: double-cell */ }
IrOp::PushF64(val) => {
fsp_dec(f);
f.instruction(&Instruction::GlobalGet(FSP))
.instruction(&Instruction::F64Const(*val))
.instruction(&Instruction::F64Store(MEM8));
}
// -- Stack manipulation ---------------------------------------------
IrOp::Drop => dsp_inc(f),
IrOp::Dup => {
peek(f);
push_via_local(f, SCRATCH_BASE);
}
IrOp::Swap => {
// ( a b -- b a )
pop_to(f, SCRATCH_BASE); // b
pop_to(f, SCRATCH_BASE + 1); // a
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
push_via_local(f, SCRATCH_BASE + 2);
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1));
push_via_local(f, SCRATCH_BASE + 2);
}
IrOp::Over => {
// ( a b -- a b a ) : read second item
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
.instruction(&Instruction::I32Const(CELL_SIZE as i32))
.instruction(&Instruction::I32Add)
.instruction(&Instruction::I32Load(MEM4));
push_via_local(f, SCRATCH_BASE);
}
IrOp::Rot => {
// ( a b c -- b c a )
pop_to(f, SCRATCH_BASE); // c
pop_to(f, SCRATCH_BASE + 1); // b
pop_to(f, SCRATCH_BASE + 2); // a
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1));
push_via_local(f, SCRATCH_BASE + 3);
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
push_via_local(f, SCRATCH_BASE + 3);
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 2));
push_via_local(f, SCRATCH_BASE + 3);
}
IrOp::Nip => {
// ( a b -- b )
pop_to(f, SCRATCH_BASE); // b
dsp_inc(f); // drop a
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
push_via_local(f, SCRATCH_BASE + 1);
}
IrOp::Tuck => {
// ( a b -- b a b )
pop_to(f, SCRATCH_BASE); // b
pop_to(f, SCRATCH_BASE + 1); // a
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
push_via_local(f, SCRATCH_BASE + 2);
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1));
push_via_local(f, SCRATCH_BASE + 2);
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
push_via_local(f, SCRATCH_BASE + 2);
}
// -- Arithmetic -----------------------------------------------------
IrOp::Add => emit_binary_commutative(f, &Instruction::I32Add),
IrOp::Mul => emit_binary_commutative(f, &Instruction::I32Mul),
IrOp::Sub => {
// ( a b -- a-b )
pop_to(f, SCRATCH_BASE); // b
pop_to(f, SCRATCH_BASE + 1); // a
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::I32Sub);
push_via_local(f, SCRATCH_BASE + 2);
}
IrOp::DivMod => {
// ( n1 n2 -- rem quot )
pop_to(f, SCRATCH_BASE); // n2
pop_to(f, SCRATCH_BASE + 1); // n1
// Push remainder first (deeper)
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::I32RemS);
push_via_local(f, SCRATCH_BASE + 2);
// Push quotient on top
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::I32DivS);
push_via_local(f, SCRATCH_BASE + 2);
}
IrOp::Negate => {
pop_to(f, SCRATCH_BASE);
f.instruction(&Instruction::I32Const(0))
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::I32Sub);
push_via_local(f, SCRATCH_BASE + 1);
}
IrOp::Abs => {
pop_to(f, SCRATCH_BASE);
// if local < 0: local = 0 - local
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::I32Const(0))
.instruction(&Instruction::I32LtS)
.instruction(&Instruction::If(BlockType::Empty))
.instruction(&Instruction::I32Const(0))
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::I32Sub)
.instruction(&Instruction::LocalSet(SCRATCH_BASE))
.instruction(&Instruction::End);
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
push_via_local(f, SCRATCH_BASE + 1);
}
// -- Comparison -----------------------------------------------------
IrOp::Eq => emit_cmp(f, &Instruction::I32Eq),
IrOp::NotEq => emit_cmp(f, &Instruction::I32Ne),
IrOp::Lt => emit_cmp(f, &Instruction::I32LtS),
IrOp::Gt => emit_cmp(f, &Instruction::I32GtS),
IrOp::LtUnsigned => emit_cmp(f, &Instruction::I32LtU),
IrOp::ZeroEq => {
pop(f);
f.instruction(&Instruction::I32Eqz);
bool_to_forth_flag(f, SCRATCH_BASE);
push_via_local(f, SCRATCH_BASE + 1);
}
IrOp::ZeroLt => {
pop(f);
f.instruction(&Instruction::I32Const(0))
.instruction(&Instruction::I32LtS);
bool_to_forth_flag(f, SCRATCH_BASE);
push_via_local(f, SCRATCH_BASE + 1);
}
// -- Logic ----------------------------------------------------------
IrOp::And => emit_binary_commutative(f, &Instruction::I32And),
IrOp::Or => emit_binary_commutative(f, &Instruction::I32Or),
IrOp::Xor => emit_binary_commutative(f, &Instruction::I32Xor),
IrOp::Invert => {
pop(f);
f.instruction(&Instruction::I32Const(-1))
.instruction(&Instruction::I32Xor);
push_via_local(f, SCRATCH_BASE);
}
IrOp::Lshift => emit_binary_ordered(f, &Instruction::I32Shl),
IrOp::Rshift => emit_binary_ordered(f, &Instruction::I32ShrU),
IrOp::ArithRshift => emit_binary_ordered(f, &Instruction::I32ShrS),
// -- Memory ---------------------------------------------------------
IrOp::Fetch => {
// ( addr -- value )
pop(f);
f.instruction(&Instruction::I32Load(MEM4));
push_via_local(f, SCRATCH_BASE);
}
IrOp::Store => {
// ( x addr -- )
pop_to(f, SCRATCH_BASE); // addr
pop_to(f, SCRATCH_BASE + 1); // x
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
.instruction(&Instruction::I32Store(MEM4));
}
IrOp::CFetch => {
pop(f);
f.instruction(&Instruction::I32Load8U(MEM1));
push_via_local(f, SCRATCH_BASE);
}
IrOp::CStore => {
pop_to(f, SCRATCH_BASE); // addr
pop_to(f, SCRATCH_BASE + 1); // char
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
.instruction(&Instruction::I32Store8(MEM1));
}
IrOp::PlusStore => {
// ( n addr -- ) : mem[addr] += n
pop_to(f, SCRATCH_BASE); // addr
pop_to(f, SCRATCH_BASE + 1); // n
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::I32Load(MEM4))
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
.instruction(&Instruction::I32Add)
.instruction(&Instruction::I32Store(MEM4));
}
// -- Control flow ---------------------------------------------------
IrOp::Call(word_id) => {
// Write back cached DSP before call
dsp_writeback(f);
f.instruction(&Instruction::I32Const(word_id.0 as i32))
.instruction(&Instruction::CallIndirect {
type_index: TYPE_VOID,
table_index: TABLE,
});
// Reload cached DSP after call (callee may have modified it)
dsp_reload(f);
}
IrOp::TailCall(word_id) => {
// Write back cached DSP before tail call
dsp_writeback(f);
f.instruction(&Instruction::I32Const(word_id.0 as i32))
.instruction(&Instruction::CallIndirect {
type_index: TYPE_VOID,
table_index: TABLE,
});
// Callee's epilogue already wrote back to the global, so just return.
// No reload needed since we're not using the local after this.
f.instruction(&Instruction::Return);
}
IrOp::If {
then_body,
else_body,
} => {
pop(f);
f.instruction(&Instruction::If(BlockType::Empty));
emit_body(f, then_body, ctx);
if let Some(eb) = else_body {
f.instruction(&Instruction::Else);
emit_body(f, eb, ctx);
}
f.instruction(&Instruction::End);
}
IrOp::DoLoop { body, is_plus_loop } => {
emit_do_loop(f, body, *is_plus_loop, ctx);
}
IrOp::BeginUntil { body } => {
f.instruction(&Instruction::Loop(BlockType::Empty));
emit_body(f, body, ctx);
pop(f);
f.instruction(&Instruction::I32Eqz)
.instruction(&Instruction::BrIf(0))
.instruction(&Instruction::End);
}
IrOp::BeginAgain { body } => {
f.instruction(&Instruction::Loop(BlockType::Empty));
emit_body(f, body, ctx);
f.instruction(&Instruction::Br(0))
.instruction(&Instruction::End);
}
IrOp::BeginWhileRepeat { test, body } => {
f.instruction(&Instruction::Block(BlockType::Empty));
f.instruction(&Instruction::Loop(BlockType::Empty));
emit_body(f, test, ctx);
pop(f);
f.instruction(&Instruction::I32Eqz)
.instruction(&Instruction::BrIf(1)); // break to outer block
emit_body(f, body, ctx);
f.instruction(&Instruction::Br(0)) // continue loop
.instruction(&Instruction::End) // end loop
.instruction(&Instruction::End); // end block
}
IrOp::BeginDoubleWhileRepeat {
outer_test,
inner_test,
body,
after_repeat,
else_body,
} => {
// WASM structure:
// block $end ;; THEN target
// block $else ;; first WHILE false target
// block $after ;; second WHILE false target
// loop $begin
// outer_test
// br_if(2) $else ;; first WHILE: if false, skip to else
// inner_test
// br_if(1) $after ;; second WHILE: if false, skip to after
// body
// br(0) ;; REPEAT: back to loop start
// end
// end
// after_repeat code
// br(1) $end ;; skip else, goto end
// end
// else code
// end
f.instruction(&Instruction::Block(BlockType::Empty)); // $end
f.instruction(&Instruction::Block(BlockType::Empty)); // $else
f.instruction(&Instruction::Block(BlockType::Empty)); // $after
f.instruction(&Instruction::Loop(BlockType::Empty)); // $begin
emit_body(f, outer_test, ctx);
pop(f);
f.instruction(&Instruction::I32Eqz)
.instruction(&Instruction::BrIf(2)); // to $else
emit_body(f, inner_test, ctx);
pop(f);
f.instruction(&Instruction::I32Eqz)
.instruction(&Instruction::BrIf(1)); // to $after
emit_body(f, body, ctx);
f.instruction(&Instruction::Br(0)); // back to $begin
f.instruction(&Instruction::End); // end loop
f.instruction(&Instruction::End); // end $after block
emit_body(f, after_repeat, ctx);
if else_body.is_some() {
f.instruction(&Instruction::Br(1)); // skip else, goto $end
}
f.instruction(&Instruction::End); // end $else block
if let Some(eb) = else_body {
emit_body(f, eb, ctx);
}
f.instruction(&Instruction::End); // end $end block
}
IrOp::Exit => {
// Write back cached DSP before early return
dsp_writeback(f);
f.instruction(&Instruction::Return);
}
// -- Return stack ---------------------------------------------------
IrOp::ToR => {
pop(f);
rpush_via_local(f, SCRATCH_BASE);
}
IrOp::FromR => {
rpop(f);
push_via_local(f, SCRATCH_BASE);
}
IrOp::RFetch => {
rpeek(f);
push_via_local(f, SCRATCH_BASE);
}
// -- I/O ------------------------------------------------------------
IrOp::Emit => {
pop(f);
f.instruction(&Instruction::Call(EMIT_FUNC));
}
IrOp::Dot => {
// MVP stub: pop and discard
pop(f);
f.instruction(&Instruction::Drop);
}
IrOp::Cr => {
f.instruction(&Instruction::I32Const(10))
.instruction(&Instruction::Call(EMIT_FUNC));
}
IrOp::Type => {
// MVP stub: drop both (c-addr u)
pop(f);
f.instruction(&Instruction::Drop);
pop(f);
f.instruction(&Instruction::Drop);
}
// -- System ---------------------------------------------------------
IrOp::Execute => {
pop(f);
// Write back cached DSP before indirect call
dsp_writeback(f);
f.instruction(&Instruction::CallIndirect {
type_index: TYPE_VOID,
table_index: TABLE,
});
// Reload cached DSP after call
dsp_reload(f);
}
IrOp::SpFetch => {
// Push the current cached DSP value onto the data stack.
// Save DSP, decrement, then store the saved value at new TOS.
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
.instruction(&Instruction::LocalSet(SCRATCH_BASE));
dsp_dec(f);
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::I32Store(MEM4));
}
// -- Compound operations -----------------------------------------------
IrOp::TwoDup => {
// ( a b -- a b a b )
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
.instruction(&Instruction::I32Load(MEM4)); // b
f.instruction(&Instruction::LocalSet(SCRATCH_BASE));
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
.instruction(&Instruction::I32Const(CELL_SIZE as i32))
.instruction(&Instruction::I32Add)
.instruction(&Instruction::I32Load(MEM4)); // a
f.instruction(&Instruction::LocalSet(SCRATCH_BASE + 1));
// dsp -= 8
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
.instruction(&Instruction::I32Const((CELL_SIZE * 2) as i32))
.instruction(&Instruction::I32Sub)
.instruction(&Instruction::LocalSet(CACHED_DSP_LOCAL));
// store a at [dsp+4], b at [dsp]
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
.instruction(&Instruction::I32Const(CELL_SIZE as i32))
.instruction(&Instruction::I32Add)
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
.instruction(&Instruction::I32Store(MEM4));
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::I32Store(MEM4));
}
IrOp::TwoDrop => {
// ( a b -- )
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
.instruction(&Instruction::I32Const((CELL_SIZE * 2) as i32))
.instruction(&Instruction::I32Add)
.instruction(&Instruction::LocalSet(CACHED_DSP_LOCAL));
}
// -- Float stack ops -----------------------------------------------
IrOp::FDrop => fsp_inc(f),
IrOp::FDup => {
fpeek(f);
fpush_via_local(f, ctx.f64_local_0);
}
IrOp::FSwap => {
fpop(f);
f.instruction(&Instruction::LocalSet(ctx.f64_local_0));
fpop(f);
f.instruction(&Instruction::LocalSet(ctx.f64_local_1));
fpush_from_local(f, ctx.f64_local_0);
fpush_from_local(f, ctx.f64_local_1);
}
IrOp::FOver => {
f.instruction(&Instruction::GlobalGet(FSP))
.instruction(&Instruction::I32Const(8))
.instruction(&Instruction::I32Add)
.instruction(&Instruction::F64Load(MEM8));
fpush_via_local(f, ctx.f64_local_0);
}
// -- Float arithmetic ----------------------------------------------
IrOp::FAdd => emit_float_binary(f, ctx, &Instruction::F64Add),
IrOp::FSub => emit_float_binary(f, ctx, &Instruction::F64Sub),
IrOp::FMul => emit_float_binary(f, ctx, &Instruction::F64Mul),
IrOp::FDiv => emit_float_binary(f, ctx, &Instruction::F64Div),
IrOp::FMin => emit_float_binary(f, ctx, &Instruction::F64Min),
IrOp::FMax => emit_float_binary(f, ctx, &Instruction::F64Max),
IrOp::FNegate => emit_float_unary(f, ctx, &Instruction::F64Neg),
IrOp::FAbs => emit_float_unary(f, ctx, &Instruction::F64Abs),
IrOp::FSqrt => emit_float_unary(f, ctx, &Instruction::F64Sqrt),
IrOp::FFloor => emit_float_unary(f, ctx, &Instruction::F64Floor),
IrOp::FRound => emit_float_unary(f, ctx, &Instruction::F64Nearest),
// -- Float comparisons (cross-stack) --------------------------------
IrOp::FZeroEq => {
fpop(f);
f.instruction(&Instruction::F64Const(0.0))
.instruction(&Instruction::F64Eq);
bool_to_forth_flag(f, SCRATCH_BASE);
push_via_local(f, SCRATCH_BASE + 1);
}
IrOp::FZeroLt => {
fpop(f);
f.instruction(&Instruction::F64Const(0.0))
.instruction(&Instruction::F64Lt);
bool_to_forth_flag(f, SCRATCH_BASE);
push_via_local(f, SCRATCH_BASE + 1);
}
IrOp::FEq => emit_float_cmp(f, ctx, &Instruction::F64Eq),
IrOp::FLt => emit_float_cmp(f, ctx, &Instruction::F64Lt),
// -- Float memory (cross-stack) ------------------------------------
IrOp::FetchFloat => {
// ( addr -- ) ( F: -- r )
pop(f); // addr on operand stack
f.instruction(&Instruction::F64Load(MEM8));
fpush_via_local(f, ctx.f64_local_0);
}
IrOp::StoreFloat => {
// ( addr -- ) ( F: r -- )
pop_to(f, SCRATCH_BASE); // addr
fpop(f);
f.instruction(&Instruction::LocalSet(ctx.f64_local_0));
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::LocalGet(ctx.f64_local_0))
.instruction(&Instruction::F64Store(MEM8));
}
// -- Float/integer conversions (cross-stack) -----------------------
IrOp::StoF => {
// ( n -- ) ( F: -- r )
pop(f);
f.instruction(&Instruction::F64ConvertI32S);
fpush_via_local(f, ctx.f64_local_0);
}
IrOp::FtoS => {
// ( F: r -- ) ( -- n )
fpop(f);
f.instruction(&Instruction::I32TruncF64S);
push_via_local(f, SCRATCH_BASE);
}
}
}
/// Binary operation where operand order does not matter (commutative).
/// Pops two from data stack, applies `op`, pushes result.
fn emit_binary_commutative(f: &mut Function, op: &Instruction<'_>) {
pop_to(f, SCRATCH_BASE); // second operand
pop_to(f, SCRATCH_BASE + 1); // first operand
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(op);
push_via_local(f, SCRATCH_BASE + 2);
}
/// Binary operation where operand order matters: ( a b -- a OP b ).
/// First pops b, then a, pushes a OP b.
fn emit_binary_ordered(f: &mut Function, op: &Instruction<'_>) {
pop_to(f, SCRATCH_BASE); // b
pop_to(f, SCRATCH_BASE + 1); // a
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(op);
push_via_local(f, SCRATCH_BASE + 2);
}
/// Comparison: pop two, compare, push Forth flag (-1 or 0).
fn emit_cmp(f: &mut Function, cmp: &Instruction<'_>) {
pop_to(f, SCRATCH_BASE); // b
pop_to(f, SCRATCH_BASE + 1); // a
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(cmp);
bool_to_forth_flag(f, SCRATCH_BASE + 2);
push_via_local(f, SCRATCH_BASE + 3);
}
/// Emit a DO...LOOP / DO...+LOOP construct.
fn emit_do_loop(f: &mut Function, body: &[IrOp], is_plus_loop: bool, ctx: &EmitCtx) {
// DO ( limit index -- )
pop_to(f, SCRATCH_BASE); // index
pop_to(f, SCRATCH_BASE + 1); // limit
// Push limit then index to return stack
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1));
rpush_via_local(f, SCRATCH_BASE + 2);
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
rpush_via_local(f, SCRATCH_BASE + 2);
// block $exit
// loop $continue
// <body>
// -- update index, check, branch
// end
// end
f.instruction(&Instruction::Block(BlockType::Empty));
f.instruction(&Instruction::Loop(BlockType::Empty));
emit_body(f, body, ctx);
// Pop current index from return stack into scratch local
rpop(f);
if is_plus_loop {
// +LOOP: Forth 2012 termination check.
// Exit when (old_index - limit) XOR (new_index - limit) is negative,
// or when the LEAVE flag is set (LEAVE sets index=limit, but +LOOP with
// step=0 would loop forever without this flag check).
f.instruction(&Instruction::LocalSet(SCRATCH_BASE));
pop_to(f, SCRATCH_BASE + 2); // step from data stack
// Check leave flag first — if set, clear it and exit immediately
f.instruction(&Instruction::I32Const(SYSVAR_LEAVE_FLAG as i32))
.instruction(&Instruction::I32Load(MEM4))
.instruction(&Instruction::If(BlockType::Empty))
.instruction(&Instruction::I32Const(SYSVAR_LEAVE_FLAG as i32))
.instruction(&Instruction::I32Const(0))
.instruction(&Instruction::I32Store(MEM4))
.instruction(&Instruction::Br(2)) // exit: If(0) → Loop(1) → Block(2)
.instruction(&Instruction::End);
// Peek limit from return stack
rpeek(f);
f.instruction(&Instruction::LocalSet(SCRATCH_BASE + 1));
// Compute old_index - limit
// SCRATCH_BASE+3 = old_index - limit
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
.instruction(&Instruction::I32Sub)
.instruction(&Instruction::LocalSet(SCRATCH_BASE + 3));
// new_index = old_index + step
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 2))
.instruction(&Instruction::I32Add)
.instruction(&Instruction::LocalSet(SCRATCH_BASE));
// Push updated index to return stack
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
rpush_via_local(f, SCRATCH_BASE + 2);
// Compute new_index - limit
// (old_index - limit) XOR (new_index - limit)
// If sign bit set (negative), exit
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 3)) // old - limit
.instruction(&Instruction::LocalGet(SCRATCH_BASE)) // new_index
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1)) // limit
.instruction(&Instruction::I32Sub) // new - limit
.instruction(&Instruction::I32Xor) // (old-limit) XOR (new-limit)
.instruction(&Instruction::I32Const(0))
.instruction(&Instruction::I32LtS) // < 0 means sign bit set
.instruction(&Instruction::BrIf(1)) // break to $exit
.instruction(&Instruction::Br(0)) // continue loop
.instruction(&Instruction::End) // end loop
.instruction(&Instruction::End); // end block
} else {
// LOOP: simple increment by 1
f.instruction(&Instruction::I32Const(1))
.instruction(&Instruction::I32Add)
.instruction(&Instruction::LocalSet(SCRATCH_BASE));
// Peek limit from return stack
rpeek(f);
f.instruction(&Instruction::LocalSet(SCRATCH_BASE + 1));
// Push updated index back to return stack
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
rpush_via_local(f, SCRATCH_BASE + 2);
// if index >= limit, exit
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
.instruction(&Instruction::I32GeS)
.instruction(&Instruction::BrIf(1)) // break to $exit
.instruction(&Instruction::Br(0)) // continue loop
.instruction(&Instruction::End) // end loop
.instruction(&Instruction::End); // end block
}
// Clean up: pop index and limit from return stack, clear leave flag
rpop(f);
f.instruction(&Instruction::Drop);
rpop(f);
f.instruction(&Instruction::Drop);
f.instruction(&Instruction::I32Const(SYSVAR_LEAVE_FLAG as i32))
.instruction(&Instruction::I32Const(0))
.instruction(&Instruction::I32Store(MEM4));
}
// ---------------------------------------------------------------------------
// Stack-to-local promotion
// ---------------------------------------------------------------------------
/// Check if a word body qualifies for stack-to-local promotion.
///
/// Phase 1: only straight-line code (no control flow, calls, I/O, return stack).
fn is_promotable(ops: &[IrOp]) -> bool {
if ops.is_empty() {
return false;
}
for op in ops {
match op {
IrOp::Call(_) | IrOp::TailCall(_) | IrOp::Execute | IrOp::SpFetch => return false,
IrOp::If { .. }
| IrOp::DoLoop { .. }
| IrOp::BeginUntil { .. }
| IrOp::BeginAgain { .. }
| IrOp::BeginWhileRepeat { .. }
| IrOp::BeginDoubleWhileRepeat { .. } => return false,
IrOp::Exit => return false,
IrOp::ToR | IrOp::FromR | IrOp::RFetch => return false,
IrOp::Emit | IrOp::Dot | IrOp::Cr | IrOp::Type => return false,
IrOp::PushI64(_) | IrOp::PushF64(_) => return false,
IrOp::FDup
| IrOp::FDrop
| IrOp::FSwap
| IrOp::FOver
| IrOp::FAdd
| IrOp::FSub
| IrOp::FMul
| IrOp::FDiv
| IrOp::FNegate
| IrOp::FAbs
| IrOp::FSqrt
| IrOp::FMin
| IrOp::FMax
| IrOp::FFloor
| IrOp::FRound
| IrOp::FZeroEq
| IrOp::FZeroLt
| IrOp::FEq
| IrOp::FLt
| IrOp::FetchFloat
| IrOp::StoreFloat
| IrOp::StoF
| IrOp::FtoS => return false,
_ => {}
}
}
true
}
/// Compute the net stack depth change for a single IR operation.
fn stack_delta(op: &IrOp) -> i32 {
match op {
IrOp::PushI32(_) | IrOp::Dup | IrOp::Over | IrOp::Tuck => 1,
IrOp::Drop | IrOp::Nip => -1,
IrOp::Swap | IrOp::Rot => 0,
IrOp::Add
| IrOp::Sub
| IrOp::Mul
| IrOp::And
| IrOp::Or
| IrOp::Xor
| IrOp::Lshift
| IrOp::Rshift
| IrOp::ArithRshift
| IrOp::Eq
| IrOp::NotEq
| IrOp::Lt
| IrOp::Gt
| IrOp::LtUnsigned => -1,
IrOp::DivMod => 0, // 2->2
IrOp::Negate | IrOp::Abs | IrOp::Invert | IrOp::ZeroEq | IrOp::ZeroLt => 0,
IrOp::Fetch | IrOp::CFetch => 0, // 1->1
IrOp::Store | IrOp::CStore | IrOp::PlusStore => -2,
IrOp::TwoDup => 2,
IrOp::TwoDrop => -2,
// Float-only ops: no data stack change
IrOp::PushF64(_)
| IrOp::FDup
| IrOp::FDrop
| IrOp::FSwap
| IrOp::FOver
| IrOp::FAdd
| IrOp::FSub
| IrOp::FMul
| IrOp::FDiv
| IrOp::FNegate
| IrOp::FAbs
| IrOp::FSqrt
| IrOp::FMin
| IrOp::FMax
| IrOp::FFloor
| IrOp::FRound => 0,
// Cross-stack: push to data stack
IrOp::FZeroEq | IrOp::FZeroLt | IrOp::FEq | IrOp::FLt | IrOp::FtoS => 1,
// Cross-stack: pop from data stack
IrOp::FetchFloat | IrOp::StoreFloat | IrOp::StoF => -1,
_ => 0,
}
}
/// Compute how many pre-existing stack items a word body needs.
///
/// Returns `(preload_count, net_depth_change)` where `preload_count` is the
/// number of items that must be loaded from the memory stack before execution.
///
/// The key insight: some ops READ existing stack positions without consuming
/// them (e.g., `Dup` reads the top). We must track the minimum stack position
/// that any op reads from, not just the net depth after consumption.
fn compute_stack_needs(ops: &[IrOp]) -> (u32, i32) {
let mut depth: i32 = 0;
let mut min_accessed: i32 = 0; // most negative position accessed
for op in ops {
// Determine the deepest position this op reads from relative to
// current depth. Position 0 = top of stack = depth-1 from base.
let reads_from = match op {
// These read the top without consuming:
IrOp::Dup => depth - 1,
// Reads top and second without consuming:
IrOp::Over => depth - 2,
IrOp::TwoDup => depth - 2,
// Reads/rearranges top 2:
IrOp::Swap | IrOp::Nip | IrOp::Tuck => depth - 2,
// Reads/rearranges top 3:
IrOp::Rot => depth - 3,
// Binary ops consume 2:
IrOp::Add
| IrOp::Sub
| IrOp::Mul
| IrOp::And
| IrOp::Or
| IrOp::Xor
| IrOp::Lshift
| IrOp::Rshift
| IrOp::ArithRshift
| IrOp::Eq
| IrOp::NotEq
| IrOp::Lt
| IrOp::Gt
| IrOp::LtUnsigned
| IrOp::DivMod
| IrOp::Store
| IrOp::CStore
| IrOp::PlusStore => depth - 2,
// Unary ops consume 1:
IrOp::Drop
| IrOp::Negate
| IrOp::Abs
| IrOp::Invert
| IrOp::ZeroEq
| IrOp::ZeroLt
| IrOp::Fetch
| IrOp::CFetch => depth - 1,
IrOp::TwoDrop => depth - 2,
// Cross-stack ops that pop from data stack
IrOp::FetchFloat | IrOp::StoreFloat | IrOp::StoF => depth - 1,
// Push ops and float-only ops don't read data stack items
_ => depth,
};
min_accessed = min_accessed.min(reads_from);
depth += stack_delta(op);
}
let preload = if min_accessed < 0 {
(-min_accessed) as u32
} else {
0
};
(preload, depth)
}
/// Count how many WASM locals the promoted code path needs (excluding cached
/// DSP and scratch locals). This is an upper bound -- we allocate a fresh
/// local for each value-producing operation.
fn count_promoted_locals(ops: &[IrOp], preload: u32) -> u32 {
let mut count = preload;
for op in ops {
match op {
IrOp::PushI32(_) => count += 1,
IrOp::Add
| IrOp::Sub
| IrOp::Mul
| IrOp::And
| IrOp::Or
| IrOp::Xor
| IrOp::Lshift
| IrOp::Rshift
| IrOp::ArithRshift
| IrOp::Eq
| IrOp::NotEq
| IrOp::Lt
| IrOp::Gt
| IrOp::LtUnsigned
| IrOp::Negate
| IrOp::Abs
| IrOp::Invert
| IrOp::ZeroEq
| IrOp::ZeroLt
| IrOp::Fetch
| IrOp::CFetch => count += 1,
IrOp::DivMod => count += 2,
IrOp::Dup | IrOp::Over | IrOp::Tuck | IrOp::TwoDup => {
// These reuse existing locals via the simulator, no extra needed
}
_ => {}
}
}
count
}
/// Stack simulator: tracks which WASM local holds each conceptual stack slot.
struct StackSim {
/// Conceptual stack: `stack[0]` = bottom, `stack.last()` = top.
/// Each entry is a WASM local index.
stack: Vec<u32>,
/// Next available local index.
next_local: u32,
}
impl StackSim {
fn new(first_local: u32) -> Self {
Self {
stack: Vec::new(),
next_local: first_local,
}
}
/// Allocate a fresh WASM local and return its index.
fn alloc(&mut self) -> u32 {
let l = self.next_local;
self.next_local += 1;
l
}
/// Push a local index onto the conceptual stack.
fn push(&mut self, local: u32) {
self.stack.push(local);
}
/// Pop the top local index from the conceptual stack.
fn pop(&mut self) -> u32 {
self.stack.pop().expect("promoted stack underflow")
}
/// Peek at the top of the conceptual stack.
fn peek(&self) -> u32 {
*self.stack.last().expect("promoted stack empty")
}
/// Peek at a position relative to the top (0 = top, 1 = second, etc.).
fn peek_at(&self, from_top: usize) -> u32 {
self.stack[self.stack.len() - 1 - from_top]
}
fn swap(&mut self) {
let len = self.stack.len();
self.stack.swap(len - 1, len - 2);
}
fn rot(&mut self) {
// ( a b c -- b c a ) : remove third from top, push to top
let len = self.stack.len();
let a = self.stack.remove(len - 3);
self.stack.push(a);
}
}
/// Emit the promoted prologue: load `preload` items from the memory stack
/// into WASM locals.
fn emit_promoted_prologue(f: &mut Function, preload: u32, sim: &mut StackSim) {
// Load items: mem[dsp] = top of stack, mem[dsp+4] = second, etc.
// We load them top-first, then reverse the sim stack so that
// sim.stack[0] = deepest loaded, sim.stack[last] = top.
for i in 0..preload {
let local = sim.alloc();
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL));
if i > 0 {
f.instruction(&Instruction::I32Const((i * CELL_SIZE) as i32));
f.instruction(&Instruction::I32Add);
}
f.instruction(&Instruction::I32Load(MEM4));
f.instruction(&Instruction::LocalSet(local));
sim.push(local);
}
// Reverse so stack[0] = deepest, stack[last] = top
sim.stack.reverse();
// Advance cached DSP past preloaded items
if preload > 0 {
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL));
f.instruction(&Instruction::I32Const((preload * CELL_SIZE) as i32));
f.instruction(&Instruction::I32Add);
f.instruction(&Instruction::LocalSet(CACHED_DSP_LOCAL));
}
}
/// Emit the promoted epilogue: write remaining stack items back to memory.
fn emit_promoted_epilogue(f: &mut Function, sim: &mut StackSim) {
let remaining = sim.stack.len() as u32;
if remaining > 0 {
// Decrement cached DSP for the items we're pushing back
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL));
f.instruction(&Instruction::I32Const((remaining * CELL_SIZE) as i32));
f.instruction(&Instruction::I32Sub);
f.instruction(&Instruction::LocalSet(CACHED_DSP_LOCAL));
// Store items: top of sim stack (last in vec) goes to [dsp],
// next goes to [dsp+4], etc.
for i in 0..remaining {
let local = sim.stack[(remaining - 1 - i) as usize]; // top first
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL));
if i > 0 {
f.instruction(&Instruction::I32Const((i * CELL_SIZE) as i32));
f.instruction(&Instruction::I32Add);
}
f.instruction(&Instruction::LocalGet(local));
f.instruction(&Instruction::I32Store(MEM4));
}
}
}
/// Emit a single promoted IR operation using WASM locals instead of memory.
///
/// Stack manipulation ops (Swap, Rot, Dup, Drop, Over, Nip, Tuck) emit zero
/// WASM instructions -- they just rearrange the simulator's local references.
/// Arithmetic and memory ops use `local.get` / `local.set` instead of
/// load/store through the data stack pointer.
fn emit_promoted_op(f: &mut Function, op: &IrOp, sim: &mut StackSim) {
match op {
// -- Literals --
IrOp::PushI32(n) => {
let local = sim.alloc();
f.instruction(&Instruction::I32Const(*n));
f.instruction(&Instruction::LocalSet(local));
sim.push(local);
}
// -- Stack manipulation: zero WASM instructions! --
IrOp::Drop => {
sim.pop();
}
IrOp::Dup => {
let top = sim.peek();
sim.push(top); // same local, aliased
}
IrOp::Swap => {
sim.swap();
}
IrOp::Over => {
let second = sim.peek_at(1);
sim.push(second);
}
IrOp::Rot => {
sim.rot();
}
IrOp::Nip => {
// ( a b -- b ) : remove second
let top = sim.pop();
sim.pop(); // discard second
sim.push(top);
}
IrOp::Tuck => {
// ( a b -- b a b ) : insert top below second
let b = sim.pop();
let a = sim.pop();
sim.push(b);
sim.push(a);
sim.push(b); // aliased, same local
}
IrOp::TwoDup => {
let b = sim.peek_at(0);
let a = sim.peek_at(1);
sim.push(a);
sim.push(b);
}
IrOp::TwoDrop => {
sim.pop();
sim.pop();
}
// -- Binary arithmetic (commutative) --
IrOp::Add => emit_promoted_binary(f, sim, &Instruction::I32Add),
IrOp::Mul => emit_promoted_binary(f, sim, &Instruction::I32Mul),
IrOp::And => emit_promoted_binary(f, sim, &Instruction::I32And),
IrOp::Or => emit_promoted_binary(f, sim, &Instruction::I32Or),
IrOp::Xor => emit_promoted_binary(f, sim, &Instruction::I32Xor),
// -- Binary arithmetic (ordered: a OP b) --
IrOp::Sub => emit_promoted_binary_ordered(f, sim, &Instruction::I32Sub),
IrOp::Lshift => emit_promoted_binary_ordered(f, sim, &Instruction::I32Shl),
IrOp::Rshift => emit_promoted_binary_ordered(f, sim, &Instruction::I32ShrU),
IrOp::ArithRshift => emit_promoted_binary_ordered(f, sim, &Instruction::I32ShrS),
// -- Comparisons --
IrOp::Eq => emit_promoted_cmp(f, sim, &Instruction::I32Eq),
IrOp::NotEq => emit_promoted_cmp(f, sim, &Instruction::I32Ne),
IrOp::Lt => emit_promoted_cmp(f, sim, &Instruction::I32LtS),
IrOp::Gt => emit_promoted_cmp(f, sim, &Instruction::I32GtS),
IrOp::LtUnsigned => emit_promoted_cmp(f, sim, &Instruction::I32LtU),
IrOp::ZeroEq => {
let a = sim.pop();
let result = sim.alloc();
f.instruction(&Instruction::LocalGet(a));
f.instruction(&Instruction::I32Eqz);
// Convert WASM bool to Forth flag: 0 - result
f.instruction(&Instruction::LocalSet(result));
f.instruction(&Instruction::I32Const(0));
f.instruction(&Instruction::LocalGet(result));
f.instruction(&Instruction::I32Sub);
f.instruction(&Instruction::LocalSet(result));
sim.push(result);
}
IrOp::ZeroLt => {
let a = sim.pop();
let result = sim.alloc();
f.instruction(&Instruction::LocalGet(a));
f.instruction(&Instruction::I32Const(0));
f.instruction(&Instruction::I32LtS);
// Convert WASM bool to Forth flag
f.instruction(&Instruction::LocalSet(result));
f.instruction(&Instruction::I32Const(0));
f.instruction(&Instruction::LocalGet(result));
f.instruction(&Instruction::I32Sub);
f.instruction(&Instruction::LocalSet(result));
sim.push(result);
}
// -- Unary arithmetic --
IrOp::Negate => {
let a = sim.pop();
let result = sim.alloc();
f.instruction(&Instruction::I32Const(0));
f.instruction(&Instruction::LocalGet(a));
f.instruction(&Instruction::I32Sub);
f.instruction(&Instruction::LocalSet(result));
sim.push(result);
}
IrOp::Abs => {
let a = sim.pop();
let result = sim.alloc();
// Copy input to result, then negate if negative
f.instruction(&Instruction::LocalGet(a));
f.instruction(&Instruction::LocalSet(result));
f.instruction(&Instruction::LocalGet(result));
f.instruction(&Instruction::I32Const(0));
f.instruction(&Instruction::I32LtS);
f.instruction(&Instruction::If(BlockType::Empty));
f.instruction(&Instruction::I32Const(0));
f.instruction(&Instruction::LocalGet(result));
f.instruction(&Instruction::I32Sub);
f.instruction(&Instruction::LocalSet(result));
f.instruction(&Instruction::End);
sim.push(result);
}
IrOp::Invert => {
let a = sim.pop();
let result = sim.alloc();
f.instruction(&Instruction::I32Const(-1));
f.instruction(&Instruction::LocalGet(a));
f.instruction(&Instruction::I32Xor);
f.instruction(&Instruction::LocalSet(result));
sim.push(result);
}
// -- DivMod: ( n1 n2 -- rem quot ) --
IrOp::DivMod => {
let n2 = sim.pop();
let n1 = sim.pop();
let rem_local = sim.alloc();
let quot_local = sim.alloc();
// remainder
f.instruction(&Instruction::LocalGet(n1));
f.instruction(&Instruction::LocalGet(n2));
f.instruction(&Instruction::I32RemS);
f.instruction(&Instruction::LocalSet(rem_local));
// quotient
f.instruction(&Instruction::LocalGet(n1));
f.instruction(&Instruction::LocalGet(n2));
f.instruction(&Instruction::I32DivS);
f.instruction(&Instruction::LocalSet(quot_local));
sim.push(rem_local);
sim.push(quot_local);
}
// -- Memory operations: these still access linear memory --
IrOp::Fetch => {
let addr = sim.pop();
let result = sim.alloc();
f.instruction(&Instruction::LocalGet(addr));
f.instruction(&Instruction::I32Load(MEM4));
f.instruction(&Instruction::LocalSet(result));
sim.push(result);
}
IrOp::CFetch => {
let addr = sim.pop();
let result = sim.alloc();
f.instruction(&Instruction::LocalGet(addr));
f.instruction(&Instruction::I32Load8U(MEM1));
f.instruction(&Instruction::LocalSet(result));
sim.push(result);
}
IrOp::Store => {
// ( x addr -- )
let addr = sim.pop();
let x = sim.pop();
f.instruction(&Instruction::LocalGet(addr));
f.instruction(&Instruction::LocalGet(x));
f.instruction(&Instruction::I32Store(MEM4));
}
IrOp::CStore => {
let addr = sim.pop();
let ch = sim.pop();
f.instruction(&Instruction::LocalGet(addr));
f.instruction(&Instruction::LocalGet(ch));
f.instruction(&Instruction::I32Store8(MEM1));
}
IrOp::PlusStore => {
// ( n addr -- ) : mem[addr] += n
let addr = sim.pop();
let n = sim.pop();
f.instruction(&Instruction::LocalGet(addr));
f.instruction(&Instruction::LocalGet(addr));
f.instruction(&Instruction::I32Load(MEM4));
f.instruction(&Instruction::LocalGet(n));
f.instruction(&Instruction::I32Add);
f.instruction(&Instruction::I32Store(MEM4));
}
// These should not appear in promotable code (caught by is_promotable),
// but handle gracefully by falling back to emit_op.
_ => {}
}
}
/// Emit a promoted binary operation (commutative).
fn emit_promoted_binary(f: &mut Function, sim: &mut StackSim, op: &Instruction<'_>) {
let b = sim.pop();
let a = sim.pop();
let result = sim.alloc();
f.instruction(&Instruction::LocalGet(a));
f.instruction(&Instruction::LocalGet(b));
f.instruction(op);
f.instruction(&Instruction::LocalSet(result));
sim.push(result);
}
/// Emit a promoted binary operation (ordered: a OP b).
fn emit_promoted_binary_ordered(f: &mut Function, sim: &mut StackSim, op: &Instruction<'_>) {
let b = sim.pop();
let a = sim.pop();
let result = sim.alloc();
f.instruction(&Instruction::LocalGet(a));
f.instruction(&Instruction::LocalGet(b));
f.instruction(op);
f.instruction(&Instruction::LocalSet(result));
sim.push(result);
}
/// Emit a promoted comparison operation (a CMP b, result is Forth flag).
fn emit_promoted_cmp(f: &mut Function, sim: &mut StackSim, cmp: &Instruction<'_>) {
let b = sim.pop();
let a = sim.pop();
let result = sim.alloc();
f.instruction(&Instruction::LocalGet(a));
f.instruction(&Instruction::LocalGet(b));
f.instruction(cmp);
// Convert WASM bool (0/1) to Forth flag (0/-1): 0 - wasm_bool
f.instruction(&Instruction::LocalSet(result));
f.instruction(&Instruction::I32Const(0));
f.instruction(&Instruction::LocalGet(result));
f.instruction(&Instruction::I32Sub);
f.instruction(&Instruction::LocalSet(result));
sim.push(result);
}
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
/// Check if an IR body (recursively) contains any float ops that need f64 locals.
fn needs_f64_locals(ops: &[IrOp]) -> bool {
for op in ops {
match op {
IrOp::PushF64(_)
| IrOp::FDup
| IrOp::FDrop
| IrOp::FSwap
| IrOp::FOver
| IrOp::FAdd
| IrOp::FSub
| IrOp::FMul
| IrOp::FDiv
| IrOp::FNegate
| IrOp::FAbs
| IrOp::FSqrt
| IrOp::FMin
| IrOp::FMax
| IrOp::FFloor
| IrOp::FRound
| IrOp::FZeroEq
| IrOp::FZeroLt
| IrOp::FEq
| IrOp::FLt
| IrOp::FetchFloat
| IrOp::StoreFloat
| IrOp::StoF
| IrOp::FtoS => return true,
IrOp::If {
then_body,
else_body,
} => {
if needs_f64_locals(then_body) {
return true;
}
if let Some(eb) = else_body
&& needs_f64_locals(eb)
{
return true;
}
}
IrOp::DoLoop { body, .. } | IrOp::BeginUntil { body } | IrOp::BeginAgain { body } => {
if needs_f64_locals(body) {
return true;
}
}
IrOp::BeginWhileRepeat { test, body } => {
if needs_f64_locals(test) || needs_f64_locals(body) {
return true;
}
}
IrOp::BeginDoubleWhileRepeat {
outer_test,
inner_test,
body,
after_repeat,
else_body,
} => {
if needs_f64_locals(outer_test)
|| needs_f64_locals(inner_test)
|| needs_f64_locals(body)
|| needs_f64_locals(after_repeat)
{
return true;
}
if let Some(eb) = else_body
&& needs_f64_locals(eb)
{
return true;
}
}
_ => {}
}
}
false
}
/// Estimate scratch locals a function body needs (not counting cached DSP).
fn count_scratch_locals(ops: &[IrOp]) -> u32 {
let mut max: u32 = 4; // baseline scratch space (indices SCRATCH_BASE..SCRATCH_BASE+3)
for op in ops {
match op {
IrOp::Rot | IrOp::Tuck => max = max.max(4),
IrOp::DoLoop { body, .. } => max = max.max(count_scratch_locals(body)),
IrOp::BeginUntil { body } => max = max.max(count_scratch_locals(body)),
IrOp::BeginAgain { body } => max = max.max(count_scratch_locals(body)),
IrOp::BeginWhileRepeat { test, body } => {
max = max
.max(count_scratch_locals(test))
.max(count_scratch_locals(body));
}
IrOp::BeginDoubleWhileRepeat {
outer_test,
inner_test,
body,
after_repeat,
else_body,
} => {
max = max
.max(count_scratch_locals(outer_test))
.max(count_scratch_locals(inner_test))
.max(count_scratch_locals(body))
.max(count_scratch_locals(after_repeat));
if let Some(eb) = else_body {
max = max.max(count_scratch_locals(eb));
}
}
IrOp::If {
then_body,
else_body,
} => {
max = max.max(count_scratch_locals(then_body));
if let Some(eb) = else_body {
max = max.max(count_scratch_locals(eb));
}
}
_ => {}
}
}
max
}
/// Generate a complete WASM module for a single compiled word.
///
/// This is the JIT path: each word gets its own module that imports
/// shared memory, globals, and function table from the host.
pub fn compile_word(
_name: &str,
body: &[IrOp],
config: &CodegenConfig,
) -> WaferResult<CompiledModule> {
let mut module = Module::new();
// -- Type section --
let mut types = TypeSection::new();
types.ty().function([], []); // type 0: () -> ()
types.ty().function([ValType::I32], []); // type 1: (i32) -> ()
module.section(&types);
// -- Import section --
let mut imports = ImportSection::new();
imports.import("env", "emit", EntityType::Function(TYPE_I32));
imports.import(
"env",
"memory",
EntityType::Memory(MemoryType {
minimum: 1,
maximum: None,
memory64: false,
shared: false,
page_size_log2: None,
}),
);
imports.import(
"env",
"dsp",
EntityType::Global(GlobalType {
val_type: ValType::I32,
mutable: true,
shared: false,
}),
);
imports.import(
"env",
"rsp",
EntityType::Global(GlobalType {
val_type: ValType::I32,
mutable: true,
shared: false,
}),
);
imports.import(
"env",
"fsp",
EntityType::Global(GlobalType {
val_type: ValType::I32,
mutable: true,
shared: false,
}),
);
imports.import(
"env",
"table",
EntityType::Table(TableType {
element_type: RefType::FUNCREF,
minimum: config.table_size as u64,
maximum: None,
table64: false,
shared: false,
}),
);
module.section(&imports);
// -- Function section --
let mut functions = FunctionSection::new();
functions.function(TYPE_VOID);
module.section(&functions);
// -- Export section --
let mut exports = ExportSection::new();
exports.export("fn", ExportKind::Func, WORD_FUNC);
module.section(&exports);
// -- Element section --
let mut elements = ElementSection::new();
let offset = ConstExpr::i32_const(config.base_fn_index as i32);
let indices = [WORD_FUNC];
elements.active(
Some(TABLE),
&offset,
Elements::Functions(Cow::Borrowed(&indices)),
);
module.section(&elements);
// -- Code section --
// Determine whether to use stack-to-local promotion
let promoted = config.stack_to_local_promotion && is_promotable(body);
let scratch_count = count_scratch_locals(body);
let num_locals = if promoted {
let (preload, _) = compute_stack_needs(body);
let promoted_count = count_promoted_locals(body, preload);
// 1 (cached DSP) + promoted locals (scratch locals not needed in promoted path)
1 + promoted_count
} else {
1 + scratch_count
};
let has_floats = needs_f64_locals(body);
let num_f64: u32 = if has_floats { 2 } else { 0 };
let mut locals_decl = vec![(num_locals, ValType::I32)];
if num_f64 > 0 {
locals_decl.push((num_f64, ValType::F64));
}
let mut func = Function::new(locals_decl);
let ctx = EmitCtx {
f64_local_0: num_locals,
f64_local_1: num_locals + 1,
};
// Prologue: cache $dsp global into local 0
func.instruction(&Instruction::GlobalGet(DSP))
.instruction(&Instruction::LocalSet(CACHED_DSP_LOCAL));
if promoted {
let (preload, _) = compute_stack_needs(body);
let first_promoted = SCRATCH_BASE; // promoted locals start right after cached_dsp
let mut sim = StackSim::new(first_promoted);
emit_promoted_prologue(&mut func, preload, &mut sim);
for op in body {
emit_promoted_op(&mut func, op, &mut sim);
}
emit_promoted_epilogue(&mut func, &mut sim);
} else {
emit_body(&mut func, body, &ctx);
}
// Epilogue: write cached DSP back to the $dsp global
func.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
.instruction(&Instruction::GlobalSet(DSP));
func.instruction(&Instruction::End);
let mut code = CodeSection::new();
code.function(&func);
module.section(&code);
let bytes = module.finish();
// Validate
wasmparser::validate(&bytes).map_err(|e| {
WaferError::ValidationError(format!("Generated WASM failed validation: {e}"))
})?;
Ok(CompiledModule {
bytes,
fn_index: config.base_fn_index,
})
}
// ---------------------------------------------------------------------------
// Consolidated module generation
// ---------------------------------------------------------------------------
/// Emit all IR operations, replacing `Call`/`TailCall` with direct calls
/// when the target word is within the consolidated module.
fn emit_consolidated_body(
f: &mut Function,
ops: &[IrOp],
local_fn_map: &HashMap<WordId, u32>,
ctx: &EmitCtx,
) {
for op in ops {
emit_consolidated_op(f, op, local_fn_map, ctx);
}
}
/// Emit a single IR operation with consolidated call support.
///
/// For `Call` and `TailCall`, emits a direct `call` if the target is in the
/// consolidated module, otherwise falls back to `call_indirect`. For control
/// flow with nested bodies, recurses to handle inner calls.
fn emit_consolidated_op(
f: &mut Function,
op: &IrOp,
local_fn_map: &HashMap<WordId, u32>,
ctx: &EmitCtx,
) {
match op {
IrOp::Call(word_id) => {
if let Some(&fn_idx) = local_fn_map.get(word_id) {
dsp_writeback(f);
f.instruction(&Instruction::Call(fn_idx));
dsp_reload(f);
} else {
// Fall back to indirect call for host functions
dsp_writeback(f);
f.instruction(&Instruction::I32Const(word_id.0 as i32))
.instruction(&Instruction::CallIndirect {
type_index: TYPE_VOID,
table_index: TABLE,
});
dsp_reload(f);
}
}
IrOp::TailCall(word_id) => {
if let Some(&fn_idx) = local_fn_map.get(word_id) {
dsp_writeback(f);
f.instruction(&Instruction::Call(fn_idx));
f.instruction(&Instruction::Return);
} else {
dsp_writeback(f);
f.instruction(&Instruction::I32Const(word_id.0 as i32))
.instruction(&Instruction::CallIndirect {
type_index: TYPE_VOID,
table_index: TABLE,
});
f.instruction(&Instruction::Return);
}
}
// Control flow with nested bodies -- recurse for consolidated calls
IrOp::If {
then_body,
else_body,
} => {
pop(f);
f.instruction(&Instruction::If(BlockType::Empty));
emit_consolidated_body(f, then_body, local_fn_map, ctx);
if let Some(eb) = else_body {
f.instruction(&Instruction::Else);
emit_consolidated_body(f, eb, local_fn_map, ctx);
}
f.instruction(&Instruction::End);
}
IrOp::DoLoop { body, is_plus_loop } => {
emit_consolidated_do_loop(f, body, *is_plus_loop, local_fn_map, ctx);
}
IrOp::BeginUntil { body } => {
f.instruction(&Instruction::Loop(BlockType::Empty));
emit_consolidated_body(f, body, local_fn_map, ctx);
pop(f);
f.instruction(&Instruction::I32Eqz)
.instruction(&Instruction::BrIf(0))
.instruction(&Instruction::End);
}
IrOp::BeginAgain { body } => {
f.instruction(&Instruction::Loop(BlockType::Empty));
emit_consolidated_body(f, body, local_fn_map, ctx);
f.instruction(&Instruction::Br(0))
.instruction(&Instruction::End);
}
IrOp::BeginWhileRepeat { test, body } => {
f.instruction(&Instruction::Block(BlockType::Empty));
f.instruction(&Instruction::Loop(BlockType::Empty));
emit_consolidated_body(f, test, local_fn_map, ctx);
pop(f);
f.instruction(&Instruction::I32Eqz)
.instruction(&Instruction::BrIf(1));
emit_consolidated_body(f, body, local_fn_map, ctx);
f.instruction(&Instruction::Br(0))
.instruction(&Instruction::End)
.instruction(&Instruction::End);
}
IrOp::BeginDoubleWhileRepeat {
outer_test,
inner_test,
body,
after_repeat,
else_body,
} => {
f.instruction(&Instruction::Block(BlockType::Empty)); // $end
f.instruction(&Instruction::Block(BlockType::Empty)); // $else
f.instruction(&Instruction::Block(BlockType::Empty)); // $after
f.instruction(&Instruction::Loop(BlockType::Empty)); // $begin
emit_consolidated_body(f, outer_test, local_fn_map, ctx);
pop(f);
f.instruction(&Instruction::I32Eqz)
.instruction(&Instruction::BrIf(2)); // to $else
emit_consolidated_body(f, inner_test, local_fn_map, ctx);
pop(f);
f.instruction(&Instruction::I32Eqz)
.instruction(&Instruction::BrIf(1)); // to $after
emit_consolidated_body(f, body, local_fn_map, ctx);
f.instruction(&Instruction::Br(0)); // back to $begin
f.instruction(&Instruction::End); // end loop
f.instruction(&Instruction::End); // end $after block
emit_consolidated_body(f, after_repeat, local_fn_map, ctx);
if else_body.is_some() {
f.instruction(&Instruction::Br(1)); // skip else, goto $end
}
f.instruction(&Instruction::End); // end $else block
if let Some(eb) = else_body {
emit_consolidated_body(f, eb, local_fn_map, ctx);
}
f.instruction(&Instruction::End); // end $end block
}
// All other ops have no nested bodies with calls -- delegate to emit_op
other => emit_op(f, other, ctx),
}
}
/// Emit a DO...LOOP / DO...+LOOP with consolidated call support for the body.
fn emit_consolidated_do_loop(
f: &mut Function,
body: &[IrOp],
is_plus_loop: bool,
local_fn_map: &HashMap<WordId, u32>,
ctx: &EmitCtx,
) {
// DO ( limit index -- )
pop_to(f, SCRATCH_BASE); // index
pop_to(f, SCRATCH_BASE + 1); // limit
// Push limit then index to return stack
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1));
rpush_via_local(f, SCRATCH_BASE + 2);
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
rpush_via_local(f, SCRATCH_BASE + 2);
f.instruction(&Instruction::Block(BlockType::Empty));
f.instruction(&Instruction::Loop(BlockType::Empty));
emit_consolidated_body(f, body, local_fn_map, ctx);
// Pop current index from return stack into scratch local
rpop(f);
if is_plus_loop {
f.instruction(&Instruction::LocalSet(SCRATCH_BASE));
pop_to(f, SCRATCH_BASE + 2); // step from data stack
// Check leave flag — if set, clear it and exit immediately
f.instruction(&Instruction::I32Const(SYSVAR_LEAVE_FLAG as i32))
.instruction(&Instruction::I32Load(MEM4))
.instruction(&Instruction::If(BlockType::Empty))
.instruction(&Instruction::I32Const(SYSVAR_LEAVE_FLAG as i32))
.instruction(&Instruction::I32Const(0))
.instruction(&Instruction::I32Store(MEM4))
.instruction(&Instruction::Br(2)) // exit: If(0) → Loop(1) → Block(2)
.instruction(&Instruction::End);
rpeek(f);
f.instruction(&Instruction::LocalSet(SCRATCH_BASE + 1));
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
.instruction(&Instruction::I32Sub)
.instruction(&Instruction::LocalSet(SCRATCH_BASE + 3));
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 2))
.instruction(&Instruction::I32Add)
.instruction(&Instruction::LocalSet(SCRATCH_BASE));
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
rpush_via_local(f, SCRATCH_BASE + 2);
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 3))
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
.instruction(&Instruction::I32Sub)
.instruction(&Instruction::I32Xor)
.instruction(&Instruction::I32Const(0))
.instruction(&Instruction::I32LtS)
.instruction(&Instruction::BrIf(1))
.instruction(&Instruction::Br(0))
.instruction(&Instruction::End)
.instruction(&Instruction::End);
} else {
f.instruction(&Instruction::I32Const(1))
.instruction(&Instruction::I32Add)
.instruction(&Instruction::LocalSet(SCRATCH_BASE));
rpeek(f);
f.instruction(&Instruction::LocalSet(SCRATCH_BASE + 1));
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
rpush_via_local(f, SCRATCH_BASE + 2);
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
.instruction(&Instruction::I32GeS)
.instruction(&Instruction::BrIf(1))
.instruction(&Instruction::Br(0))
.instruction(&Instruction::End)
.instruction(&Instruction::End);
}
// Clean up: pop index and limit from return stack, clear leave flag
rpop(f);
f.instruction(&Instruction::Drop);
rpop(f);
f.instruction(&Instruction::Drop);
f.instruction(&Instruction::I32Const(SYSVAR_LEAVE_FLAG as i32))
.instruction(&Instruction::I32Const(0))
.instruction(&Instruction::I32Store(MEM4));
}
/// Optional extras for exportable modules (data section, entry point, metadata).
pub struct ExportSections<'a> {
/// Memory snapshot to embed as a WASM data section.
pub memory_snapshot: &'a [u8],
/// If set, export this function index as `_start`.
pub entry_fn_index: Option<u32>,
/// JSON metadata to embed as a custom "wafer" section.
pub metadata_json: &'a [u8],
}
/// Compile multiple IR-based words into a single WASM module with direct calls.
///
/// Used at runtime by `CONSOLIDATE` and during startup batch compilation.
pub fn compile_consolidated_module(
words: &[(WordId, Vec<IrOp>)],
local_fn_map: &HashMap<WordId, u32>,
table_size: u32,
) -> WaferResult<Vec<u8>> {
compile_multi_word_module(words, local_fn_map, table_size, None)
}
/// Compile an exportable WASM module with embedded memory and metadata.
///
/// Same as [`compile_consolidated_module`] but adds a WASM data section
/// (memory snapshot), an optional `_start` entry point export, and a
/// custom "wafer" section with JSON metadata.
pub fn compile_exportable_module(
words: &[(WordId, Vec<IrOp>)],
local_fn_map: &HashMap<WordId, u32>,
table_size: u32,
export: &ExportSections<'_>,
) -> WaferResult<Vec<u8>> {
compile_multi_word_module(words, local_fn_map, table_size, Some(export))
}
/// Internal: build a multi-word WASM module. When `export` is `Some`, adds
/// data section, entry-point export, and custom metadata section.
fn compile_multi_word_module(
words: &[(WordId, Vec<IrOp>)],
local_fn_map: &HashMap<WordId, u32>,
table_size: u32,
export: Option<&ExportSections<'_>>,
) -> WaferResult<Vec<u8>> {
let has_data = export.is_some_and(|e| !e.memory_snapshot.is_empty());
let mut module = Module::new();
// -- Type section --
let mut types = TypeSection::new();
types.ty().function([], []); // type 0: () -> ()
types.ty().function([ValType::I32], []); // type 1: (i32) -> ()
module.section(&types);
// -- Import section (same as single-word modules) --
let mut imports = ImportSection::new();
imports.import("env", "emit", EntityType::Function(TYPE_I32));
imports.import(
"env",
"memory",
EntityType::Memory(MemoryType {
minimum: 1,
maximum: None,
memory64: false,
shared: false,
page_size_log2: None,
}),
);
imports.import(
"env",
"dsp",
EntityType::Global(GlobalType {
val_type: ValType::I32,
mutable: true,
shared: false,
}),
);
imports.import(
"env",
"rsp",
EntityType::Global(GlobalType {
val_type: ValType::I32,
mutable: true,
shared: false,
}),
);
imports.import(
"env",
"fsp",
EntityType::Global(GlobalType {
val_type: ValType::I32,
mutable: true,
shared: false,
}),
);
imports.import(
"env",
"table",
EntityType::Table(TableType {
element_type: RefType::FUNCREF,
minimum: table_size as u64,
maximum: None,
table64: false,
shared: false,
}),
);
module.section(&imports);
// -- Function section: N functions, all type void --
let mut functions = FunctionSection::new();
for _ in words {
functions.function(TYPE_VOID);
}
module.section(&functions);
// -- Export section: export each function as "fn_0", "fn_1", etc. --
let mut exports = ExportSection::new();
for (i, _) in words.iter().enumerate() {
let name = format!("fn_{i}");
// +1 because emit is imported function index 0
exports.export(&name, ExportKind::Func, (i as u32) + 1);
}
// Optionally export an entry point as "_start"
if let Some(e) = export
&& let Some(fn_idx) = e.entry_fn_index
{
exports.export("_start", ExportKind::Func, fn_idx);
}
module.section(&exports);
// -- Element section: place each function in the table at its WordId slot --
let mut elements = ElementSection::new();
for (i, (word_id, _)) in words.iter().enumerate() {
let offset = ConstExpr::i32_const(word_id.0 as i32);
let fn_idx = (i as u32) + 1; // +1 for the emit import
let indices = [fn_idx];
elements.active(
Some(TABLE),
&offset,
Elements::Functions(Cow::Borrowed(&indices)),
);
}
module.section(&elements);
// -- DataCount section (required before Code when Data section is present) --
if has_data {
module.section(&DataCountSection { count: 1 });
}
// -- Code section: emit each function body --
let mut code = CodeSection::new();
for (_word_id, body) in words {
let num_locals = 1 + count_scratch_locals(body);
let has_floats = needs_f64_locals(body);
let num_f64: u32 = if has_floats { 2 } else { 0 };
let mut locals_decl = vec![(num_locals, ValType::I32)];
if num_f64 > 0 {
locals_decl.push((num_f64, ValType::F64));
}
let mut func = Function::new(locals_decl);
let ctx = EmitCtx {
f64_local_0: num_locals,
f64_local_1: num_locals + 1,
};
// Prologue: cache $dsp global into local 0
func.instruction(&Instruction::GlobalGet(DSP))
.instruction(&Instruction::LocalSet(CACHED_DSP_LOCAL));
// Body with consolidated call support
emit_consolidated_body(&mut func, body, local_fn_map, &ctx);
// Epilogue: write cached DSP back to the $dsp global
func.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
.instruction(&Instruction::GlobalSet(DSP));
func.instruction(&Instruction::End);
code.function(&func);
}
module.section(&code);
// -- Data section (memory snapshot for exportable modules) --
if let Some(e) = export
&& !e.memory_snapshot.is_empty()
{
let mut data = DataSection::new();
data.active(
MEMORY_INDEX,
&ConstExpr::i32_const(0),
e.memory_snapshot.iter().copied(),
);
module.section(&data);
}
// -- Custom "wafer" section (metadata for exportable modules) --
if let Some(e) = export
&& !e.metadata_json.is_empty()
{
module.section(&CustomSection {
name: Cow::Borrowed("wafer"),
data: Cow::Borrowed(e.metadata_json),
});
}
let bytes = module.finish();
// Validate
wasmparser::validate(&bytes)
.map_err(|e| WaferError::ValidationError(format!("WASM module failed validation: {e}")))?;
Ok(bytes)
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use crate::dictionary::WordId;
use crate::ir::IrOp;
use crate::memory::{DATA_STACK_TOP, FLOAT_STACK_TOP, RETURN_STACK_TOP};
fn default_config() -> CodegenConfig {
CodegenConfig {
base_fn_index: 0,
table_size: 16,
stack_to_local_promotion: true,
}
}
fn validate_wasm(bytes: &[u8]) -> Result<(), String> {
wasmparser::validate(bytes)
.map(|_| ())
.map_err(|e| e.to_string())
}
// ===================================================================
// Validation-only tests
// ===================================================================
#[test]
fn compile_push_i32_validates() {
let m = compile_word("test", &[IrOp::PushI32(42)], &default_config()).unwrap();
validate_wasm(&m.bytes).unwrap();
}
#[test]
fn compile_arithmetic_validates() {
let ops = vec![IrOp::PushI32(3), IrOp::PushI32(4), IrOp::Add];
let m = compile_word("add_test", &ops, &default_config()).unwrap();
validate_wasm(&m.bytes).unwrap();
}
#[test]
fn compile_if_else_validates() {
let ops = vec![
IrOp::PushI32(1),
IrOp::If {
then_body: vec![IrOp::PushI32(42)],
else_body: Some(vec![IrOp::PushI32(0)]),
},
];
let m = compile_word("if_test", &ops, &default_config()).unwrap();
validate_wasm(&m.bytes).unwrap();
}
#[test]
fn compile_call_validates() {
let ops = vec![IrOp::Call(WordId(5))];
let m = compile_word("call_test", &ops, &default_config()).unwrap();
validate_wasm(&m.bytes).unwrap();
}
#[test]
fn compile_stack_ops_validates() {
let ops = vec![
IrOp::PushI32(1),
IrOp::PushI32(2),
IrOp::Dup,
IrOp::Swap,
IrOp::Over,
IrOp::Rot,
IrOp::Drop,
IrOp::Drop,
IrOp::Drop,
];
let m = compile_word("stack_ops", &ops, &default_config()).unwrap();
validate_wasm(&m.bytes).unwrap();
}
#[test]
fn compile_comparisons_validate() {
for op in [IrOp::Eq, IrOp::NotEq, IrOp::Lt, IrOp::Gt, IrOp::LtUnsigned] {
let ops = vec![IrOp::PushI32(3), IrOp::PushI32(4), op];
compile_word("cmp", &ops, &default_config()).unwrap();
}
for op in [IrOp::ZeroEq, IrOp::ZeroLt] {
let ops = vec![IrOp::PushI32(0), op];
compile_word("zcmp", &ops, &default_config()).unwrap();
}
}
#[test]
fn compile_logic_ops_validates() {
let ops = vec![
IrOp::PushI32(0xFF),
IrOp::PushI32(0x0F),
IrOp::And,
IrOp::PushI32(0xF0),
IrOp::Or,
IrOp::Invert,
];
compile_word("logic", &ops, &default_config()).unwrap();
}
#[test]
fn compile_memory_ops_validates() {
let ops = vec![
IrOp::PushI32(42),
IrOp::PushI32(0x100),
IrOp::Store,
IrOp::PushI32(0x100),
IrOp::Fetch,
];
compile_word("mem", &ops, &default_config()).unwrap();
}
#[test]
fn compile_begin_until_validates() {
let ops = vec![
IrOp::PushI32(5),
IrOp::BeginUntil {
body: vec![IrOp::PushI32(1), IrOp::Sub, IrOp::Dup, IrOp::ZeroEq],
},
];
compile_word("bu", &ops, &default_config()).unwrap();
}
#[test]
fn compile_begin_while_repeat_validates() {
let ops = vec![
IrOp::PushI32(3),
IrOp::BeginWhileRepeat {
test: vec![IrOp::Dup],
body: vec![IrOp::PushI32(1), IrOp::Sub],
},
];
compile_word("bwr", &ops, &default_config()).unwrap();
}
#[test]
fn compile_return_stack_validates() {
let ops = vec![IrOp::PushI32(42), IrOp::ToR, IrOp::RFetch, IrOp::FromR];
compile_word("rs", &ops, &default_config()).unwrap();
}
#[test]
fn compile_shift_ops_validates() {
let ops = vec![
IrOp::PushI32(1),
IrOp::PushI32(4),
IrOp::Lshift,
IrOp::PushI32(2),
IrOp::Rshift,
];
compile_word("shift", &ops, &default_config()).unwrap();
}
#[test]
fn compile_emit_validates() {
compile_word("emit", &[IrOp::PushI32(65), IrOp::Emit], &default_config()).unwrap();
}
#[test]
fn compile_cr_validates() {
compile_word("cr", &[IrOp::Cr], &default_config()).unwrap();
}
#[test]
fn compile_exit_validates() {
compile_word("exit", &[IrOp::PushI32(1), IrOp::Exit], &default_config()).unwrap();
}
#[test]
fn compile_nip_tuck_validates() {
let ops = vec![
IrOp::PushI32(1),
IrOp::PushI32(2),
IrOp::Nip,
IrOp::PushI32(3),
IrOp::Tuck,
];
compile_word("nt", &ops, &default_config()).unwrap();
}
#[test]
fn compile_divmod_validates() {
compile_word(
"dm",
&[IrOp::PushI32(10), IrOp::PushI32(3), IrOp::DivMod],
&default_config(),
)
.unwrap();
}
#[test]
fn compile_negate_abs_validates() {
compile_word(
"na",
&[IrOp::PushI32(-5), IrOp::Abs, IrOp::Negate],
&default_config(),
)
.unwrap();
}
#[test]
fn compile_empty_body_validates() {
compile_word("noop", &[], &default_config()).unwrap();
}
#[test]
fn compile_cfetch_cstore_validates() {
let ops = vec![
IrOp::PushI32(65),
IrOp::PushI32(0x200),
IrOp::CStore,
IrOp::PushI32(0x200),
IrOp::CFetch,
];
compile_word("byte", &ops, &default_config()).unwrap();
}
#[test]
fn compile_plus_store_validates() {
let ops = vec![
IrOp::PushI32(10),
IrOp::PushI32(0x100),
IrOp::Store,
IrOp::PushI32(5),
IrOp::PushI32(0x100),
IrOp::PlusStore,
];
compile_word("ps", &ops, &default_config()).unwrap();
}
#[test]
fn compiled_module_fn_index() {
let cfg = CodegenConfig {
base_fn_index: 7,
table_size: 16,
stack_to_local_promotion: true,
};
let m = compile_word("t", &[IrOp::PushI32(1)], &cfg).unwrap();
assert_eq!(m.fn_index, 7);
}
// ===================================================================
// Wasmtime execution tests
// ===================================================================
/// Run a compiled word via wasmtime and return the data stack (top first).
fn run_word(ops: &[IrOp]) -> Vec<i32> {
use wasmtime::*;
let compiled = compile_word("test", ops, &default_config()).unwrap();
let engine = Engine::default();
let mut store = Store::new(&engine, ());
let memory = Memory::new(&mut store, MemoryType::new(16, None)).unwrap();
let dsp = Global::new(
&mut store,
wasmtime::GlobalType::new(ValType::I32, Mutability::Var),
Val::I32(DATA_STACK_TOP as i32),
)
.unwrap();
let rsp = Global::new(
&mut store,
wasmtime::GlobalType::new(ValType::I32, Mutability::Var),
Val::I32(RETURN_STACK_TOP as i32),
)
.unwrap();
let fsp = Global::new(
&mut store,
wasmtime::GlobalType::new(ValType::I32, Mutability::Var),
Val::I32(FLOAT_STACK_TOP as i32),
)
.unwrap();
let table = Table::new(
&mut store,
wasmtime::TableType::new(RefType::FUNCREF, 16, None),
Ref::Func(None),
)
.unwrap();
let emit_ty = FuncType::new(&engine, [ValType::I32], []);
let emit = Func::new(&mut store, emit_ty, |_caller, _params, _results| Ok(()));
let module = wasmtime::Module::new(&engine, &compiled.bytes).unwrap();
let instance = Instance::new(
&mut store,
&module,
&[
emit.into(),
memory.into(),
dsp.into(),
rsp.into(),
fsp.into(),
table.into(),
],
)
.unwrap();
instance
.get_func(&mut store, "fn")
.unwrap()
.call(&mut store, &[], &mut [])
.unwrap();
// Read data stack
let sp = dsp.get(&mut store).unwrap_i32() as u32;
let data = memory.data(&store);
let mut stack = Vec::new();
let mut addr = sp;
while addr < DATA_STACK_TOP {
let b: [u8; 4] = data[addr as usize..addr as usize + 4].try_into().unwrap();
stack.push(i32::from_le_bytes(b));
addr += CELL_SIZE;
}
stack
}
#[test]
fn execute_push_i32() {
assert_eq!(run_word(&[IrOp::PushI32(42)]), vec![42]);
}
#[test]
fn execute_push_multiple() {
assert_eq!(
run_word(&[IrOp::PushI32(1), IrOp::PushI32(2), IrOp::PushI32(3)]),
vec![3, 2, 1],
);
}
#[test]
fn execute_add() {
assert_eq!(
run_word(&[IrOp::PushI32(3), IrOp::PushI32(4), IrOp::Add]),
vec![7]
);
}
#[test]
fn execute_sub() {
assert_eq!(
run_word(&[IrOp::PushI32(10), IrOp::PushI32(3), IrOp::Sub]),
vec![7]
);
}
#[test]
fn execute_mul() {
assert_eq!(
run_word(&[IrOp::PushI32(6), IrOp::PushI32(7), IrOp::Mul]),
vec![42]
);
}
#[test]
fn execute_divmod() {
// ( 10 3 -- rem quot ) => ( 1 3 ) => top-first: [3, 1]
assert_eq!(
run_word(&[IrOp::PushI32(10), IrOp::PushI32(3), IrOp::DivMod]),
vec![3, 1]
);
}
#[test]
fn execute_dup() {
assert_eq!(run_word(&[IrOp::PushI32(42), IrOp::Dup]), vec![42, 42]);
}
#[test]
fn execute_drop() {
assert_eq!(
run_word(&[IrOp::PushI32(1), IrOp::PushI32(2), IrOp::Drop]),
vec![1]
);
}
#[test]
fn execute_swap() {
// ( 1 2 -- 2 1 ) => top-first: [1, 2]
assert_eq!(
run_word(&[IrOp::PushI32(1), IrOp::PushI32(2), IrOp::Swap]),
vec![1, 2]
);
}
#[test]
fn execute_over() {
// ( 1 2 -- 1 2 1 )
assert_eq!(
run_word(&[IrOp::PushI32(1), IrOp::PushI32(2), IrOp::Over]),
vec![1, 2, 1]
);
}
#[test]
fn execute_rot() {
// ( 1 2 3 -- 2 3 1 ) => top-first: [1, 3, 2]
assert_eq!(
run_word(&[
IrOp::PushI32(1),
IrOp::PushI32(2),
IrOp::PushI32(3),
IrOp::Rot
]),
vec![1, 3, 2],
);
}
#[test]
fn execute_negate() {
assert_eq!(run_word(&[IrOp::PushI32(5), IrOp::Negate]), vec![-5]);
}
#[test]
fn execute_abs() {
assert_eq!(run_word(&[IrOp::PushI32(-42), IrOp::Abs]), vec![42]);
assert_eq!(run_word(&[IrOp::PushI32(42), IrOp::Abs]), vec![42]);
}
#[test]
fn execute_eq() {
assert_eq!(
run_word(&[IrOp::PushI32(5), IrOp::PushI32(5), IrOp::Eq]),
vec![-1]
);
assert_eq!(
run_word(&[IrOp::PushI32(3), IrOp::PushI32(5), IrOp::Eq]),
vec![0]
);
}
#[test]
fn execute_lt() {
assert_eq!(
run_word(&[IrOp::PushI32(3), IrOp::PushI32(5), IrOp::Lt]),
vec![-1]
);
assert_eq!(
run_word(&[IrOp::PushI32(5), IrOp::PushI32(3), IrOp::Lt]),
vec![0]
);
}
#[test]
fn execute_gt() {
assert_eq!(
run_word(&[IrOp::PushI32(5), IrOp::PushI32(3), IrOp::Gt]),
vec![-1]
);
}
#[test]
fn execute_zero_eq() {
assert_eq!(run_word(&[IrOp::PushI32(0), IrOp::ZeroEq]), vec![-1]);
assert_eq!(run_word(&[IrOp::PushI32(5), IrOp::ZeroEq]), vec![0]);
}
#[test]
fn execute_zero_lt() {
assert_eq!(run_word(&[IrOp::PushI32(-1), IrOp::ZeroLt]), vec![-1]);
assert_eq!(run_word(&[IrOp::PushI32(0), IrOp::ZeroLt]), vec![0]);
}
#[test]
fn execute_and_or_xor() {
assert_eq!(
run_word(&[IrOp::PushI32(0xFF), IrOp::PushI32(0x0F), IrOp::And]),
vec![0x0F]
);
assert_eq!(
run_word(&[IrOp::PushI32(0xF0), IrOp::PushI32(0x0F), IrOp::Or]),
vec![0xFF]
);
assert_eq!(
run_word(&[IrOp::PushI32(0xFF), IrOp::PushI32(0xF0), IrOp::Xor]),
vec![0x0F]
);
}
#[test]
fn execute_invert() {
assert_eq!(run_word(&[IrOp::PushI32(0), IrOp::Invert]), vec![-1]);
}
#[test]
fn execute_shifts() {
assert_eq!(
run_word(&[IrOp::PushI32(1), IrOp::PushI32(4), IrOp::Lshift]),
vec![16]
);
assert_eq!(
run_word(&[IrOp::PushI32(16), IrOp::PushI32(2), IrOp::Rshift]),
vec![4]
);
}
#[test]
fn execute_fetch_store() {
let ops = vec![
IrOp::PushI32(42),
IrOp::PushI32(0x100),
IrOp::Store,
IrOp::PushI32(0x100),
IrOp::Fetch,
];
assert_eq!(run_word(&ops), vec![42]);
}
#[test]
fn execute_cfetch_cstore() {
let ops = vec![
IrOp::PushI32(65),
IrOp::PushI32(0x200),
IrOp::CStore,
IrOp::PushI32(0x200),
IrOp::CFetch,
];
assert_eq!(run_word(&ops), vec![65]);
}
#[test]
fn execute_if_then_else() {
// TRUE path
let ops = vec![
IrOp::PushI32(-1),
IrOp::If {
then_body: vec![IrOp::PushI32(42)],
else_body: Some(vec![IrOp::PushI32(0)]),
},
];
assert_eq!(run_word(&ops), vec![42]);
// FALSE path
let ops = vec![
IrOp::PushI32(0),
IrOp::If {
then_body: vec![IrOp::PushI32(42)],
else_body: Some(vec![IrOp::PushI32(0)]),
},
];
assert_eq!(run_word(&ops), vec![0]);
}
#[test]
fn execute_if_without_else() {
let ops = vec![
IrOp::PushI32(99),
IrOp::PushI32(-1),
IrOp::If {
then_body: vec![IrOp::PushI32(42)],
else_body: None,
},
];
assert_eq!(run_word(&ops), vec![42, 99]);
let ops = vec![
IrOp::PushI32(99),
IrOp::PushI32(0),
IrOp::If {
then_body: vec![IrOp::PushI32(42)],
else_body: None,
},
];
assert_eq!(run_word(&ops), vec![99]);
}
#[test]
fn execute_nested_if() {
let ops = vec![
IrOp::PushI32(-1),
IrOp::If {
then_body: vec![
IrOp::PushI32(-1),
IrOp::If {
then_body: vec![IrOp::PushI32(1)],
else_body: Some(vec![IrOp::PushI32(2)]),
},
],
else_body: Some(vec![IrOp::PushI32(3)]),
},
];
assert_eq!(run_word(&ops), vec![1]);
}
#[test]
fn execute_begin_until() {
// Count down from 3
let ops = vec![
IrOp::PushI32(3),
IrOp::BeginUntil {
body: vec![IrOp::PushI32(1), IrOp::Sub, IrOp::Dup, IrOp::ZeroEq],
},
];
assert_eq!(run_word(&ops), vec![0]);
}
#[test]
fn execute_return_stack() {
let ops = vec![IrOp::PushI32(42), IrOp::ToR, IrOp::PushI32(99), IrOp::FromR];
assert_eq!(run_word(&ops), vec![42, 99]);
}
#[test]
fn execute_rfetch() {
let ops = vec![IrOp::PushI32(42), IrOp::ToR, IrOp::RFetch, IrOp::FromR];
assert_eq!(run_word(&ops), vec![42, 42]);
}
#[test]
fn execute_nip() {
assert_eq!(
run_word(&[IrOp::PushI32(1), IrOp::PushI32(2), IrOp::Nip]),
vec![2]
);
}
#[test]
fn execute_tuck() {
// ( 1 2 -- 2 1 2 )
assert_eq!(
run_word(&[IrOp::PushI32(1), IrOp::PushI32(2), IrOp::Tuck]),
vec![2, 1, 2],
);
}
#[test]
fn execute_plus_store() {
let ops = vec![
IrOp::PushI32(10),
IrOp::PushI32(0x100),
IrOp::Store,
IrOp::PushI32(5),
IrOp::PushI32(0x100),
IrOp::PlusStore,
IrOp::PushI32(0x100),
IrOp::Fetch,
];
assert_eq!(run_word(&ops), vec![15]);
}
#[test]
fn execute_complex_expression() {
// (3 + 4) * 2 = 14
let ops = vec![
IrOp::PushI32(3),
IrOp::PushI32(4),
IrOp::Add,
IrOp::PushI32(2),
IrOp::Mul,
];
assert_eq!(run_word(&ops), vec![14]);
}
// ===================================================================
// Stack-to-local promotion tests
// ===================================================================
#[test]
fn promotable_pure_arithmetic() {
assert!(is_promotable(&[IrOp::Dup, IrOp::Mul]));
assert!(is_promotable(&[IrOp::PushI32(1), IrOp::Add]));
assert!(is_promotable(&[IrOp::Swap, IrOp::Over, IrOp::Nip]));
}
#[test]
fn not_promotable_with_calls() {
assert!(!is_promotable(&[IrOp::Call(WordId(5))]));
assert!(!is_promotable(&[IrOp::Emit]));
assert!(!is_promotable(&[IrOp::ToR]));
assert!(!is_promotable(&[IrOp::If {
then_body: vec![],
else_body: None,
}]));
assert!(!is_promotable(&[]));
}
#[test]
fn compute_stack_needs_dup_mul() {
// DUP * : reads 1 item from caller, net change = 0 (1 in, 1 out via dup*mul)
let (preload, net) = compute_stack_needs(&[IrOp::Dup, IrOp::Mul]);
assert_eq!(preload, 1);
assert_eq!(net, 0);
}
#[test]
fn compute_stack_needs_push_add() {
// PushI32(1) Add: needs 1 item from caller (Add consumes 2, push provides 1)
let (preload, net) = compute_stack_needs(&[IrOp::PushI32(1), IrOp::Add]);
assert_eq!(preload, 1); // Add reads depth-2 = -1 when depth=1 after push
assert_eq!(net, 0);
}
#[test]
fn compute_stack_needs_swap() {
// SWAP: reads 2 items, net = 0
let (preload, net) = compute_stack_needs(&[IrOp::Swap]);
assert_eq!(preload, 2);
assert_eq!(net, 0);
}
#[test]
fn promoted_dup_mul_executes() {
// SQUARE = DUP * (promotable: preload 1 item, no memory stack ops)
let ops = vec![IrOp::PushI32(7), IrOp::Dup, IrOp::Mul];
assert_eq!(run_word(&ops), vec![49]);
}
#[test]
fn promoted_swap_executes() {
// Swap two items using promoted path (zero WASM instructions for swap)
let ops = vec![IrOp::PushI32(1), IrOp::PushI32(2), IrOp::Swap];
assert_eq!(run_word(&ops), vec![1, 2]);
}
#[test]
fn promoted_over_add_executes() {
// OVER OVER + : promoted, reads 2 items, pushes 1 extra
let ops = vec![
IrOp::PushI32(3),
IrOp::PushI32(4),
IrOp::Over,
IrOp::Over,
IrOp::Add,
];
assert_eq!(run_word(&ops), vec![7, 4, 3]);
}
#[test]
fn promoted_nip_executes() {
let ops = vec![IrOp::PushI32(10), IrOp::PushI32(20), IrOp::Nip];
assert_eq!(run_word(&ops), vec![20]);
}
#[test]
fn promoted_rot_executes() {
let ops = vec![
IrOp::PushI32(1),
IrOp::PushI32(2),
IrOp::PushI32(3),
IrOp::Rot,
];
assert_eq!(run_word(&ops), vec![1, 3, 2]);
}
#[test]
fn promoted_comparison_executes() {
let ops = vec![IrOp::PushI32(5), IrOp::PushI32(5), IrOp::Eq];
assert_eq!(run_word(&ops), vec![-1]);
let ops = vec![IrOp::PushI32(3), IrOp::PushI32(5), IrOp::Lt];
assert_eq!(run_word(&ops), vec![-1]);
}
#[test]
fn promoted_memory_fetch_store_executes() {
let ops = vec![
IrOp::PushI32(42),
IrOp::PushI32(0x100),
IrOp::Store,
IrOp::PushI32(0x100),
IrOp::Fetch,
];
assert_eq!(run_word(&ops), vec![42]);
}
#[test]
fn promoted_divmod_executes() {
// ( 10 3 -- rem quot ) => top-first: [3, 1]
let ops = vec![IrOp::PushI32(10), IrOp::PushI32(3), IrOp::DivMod];
assert_eq!(run_word(&ops), vec![3, 1]);
}
#[test]
fn promoted_tuck_executes() {
// ( 1 2 -- 2 1 2 )
let ops = vec![IrOp::PushI32(1), IrOp::PushI32(2), IrOp::Tuck];
assert_eq!(run_word(&ops), vec![2, 1, 2]);
}
#[test]
fn promoted_two_dup_executes() {
let ops = vec![IrOp::PushI32(3), IrOp::PushI32(4), IrOp::TwoDup];
assert_eq!(run_word(&ops), vec![4, 3, 4, 3]);
}
#[test]
fn promoted_two_drop_executes() {
let ops = vec![
IrOp::PushI32(1),
IrOp::PushI32(2),
IrOp::PushI32(3),
IrOp::TwoDrop,
];
assert_eq!(run_word(&ops), vec![1]);
}
#[test]
fn promoted_negate_abs_invert_executes() {
assert_eq!(run_word(&[IrOp::PushI32(5), IrOp::Negate]), vec![-5]);
assert_eq!(run_word(&[IrOp::PushI32(-42), IrOp::Abs]), vec![42]);
assert_eq!(run_word(&[IrOp::PushI32(0), IrOp::Invert]), vec![-1]);
}
#[test]
fn promoted_zero_eq_zero_lt_executes() {
assert_eq!(run_word(&[IrOp::PushI32(0), IrOp::ZeroEq]), vec![-1]);
assert_eq!(run_word(&[IrOp::PushI32(5), IrOp::ZeroEq]), vec![0]);
assert_eq!(run_word(&[IrOp::PushI32(-1), IrOp::ZeroLt]), vec![-1]);
assert_eq!(run_word(&[IrOp::PushI32(0), IrOp::ZeroLt]), vec![0]);
}
#[test]
fn promoted_shift_executes() {
assert_eq!(
run_word(&[IrOp::PushI32(1), IrOp::PushI32(4), IrOp::Lshift]),
vec![16]
);
assert_eq!(
run_word(&[IrOp::PushI32(16), IrOp::PushI32(2), IrOp::Rshift]),
vec![4]
);
}
#[test]
fn promoted_plus_store_executes() {
let ops = vec![
IrOp::PushI32(10),
IrOp::PushI32(0x100),
IrOp::Store,
IrOp::PushI32(5),
IrOp::PushI32(0x100),
IrOp::PlusStore,
IrOp::PushI32(0x100),
IrOp::Fetch,
];
assert_eq!(run_word(&ops), vec![15]);
}
#[test]
fn promoted_cfetch_cstore_executes() {
let ops = vec![
IrOp::PushI32(65),
IrOp::PushI32(0x200),
IrOp::CStore,
IrOp::PushI32(0x200),
IrOp::CFetch,
];
assert_eq!(run_word(&ops), vec![65]);
}
#[test]
fn non_promotable_still_works() {
// Words with control flow should NOT be promoted, but should still work
let ops = vec![
IrOp::PushI32(-1),
IrOp::If {
then_body: vec![IrOp::PushI32(42)],
else_body: Some(vec![IrOp::PushI32(0)]),
},
];
assert!(!is_promotable(&ops));
assert_eq!(run_word(&ops), vec![42]);
}
// ===================================================================
// Float IR tests
// ===================================================================
/// Run a compiled word and return the float stack (top first).
fn run_float_word(ops: &[IrOp]) -> Vec<f64> {
use wasmtime::*;
let compiled = compile_word("test", ops, &default_config()).unwrap();
let engine = Engine::default();
let mut store = Store::new(&engine, ());
let memory = Memory::new(&mut store, MemoryType::new(16, None)).unwrap();
let dsp = Global::new(
&mut store,
wasmtime::GlobalType::new(ValType::I32, Mutability::Var),
Val::I32(DATA_STACK_TOP as i32),
)
.unwrap();
let rsp = Global::new(
&mut store,
wasmtime::GlobalType::new(ValType::I32, Mutability::Var),
Val::I32(RETURN_STACK_TOP as i32),
)
.unwrap();
let fsp = Global::new(
&mut store,
wasmtime::GlobalType::new(ValType::I32, Mutability::Var),
Val::I32(FLOAT_STACK_TOP as i32),
)
.unwrap();
let table = Table::new(
&mut store,
wasmtime::TableType::new(RefType::FUNCREF, 16, None),
Ref::Func(None),
)
.unwrap();
let emit_ty = FuncType::new(&engine, [ValType::I32], []);
let emit = Func::new(&mut store, emit_ty, |_caller, _params, _results| Ok(()));
let module = wasmtime::Module::new(&engine, &compiled.bytes).unwrap();
let instance = Instance::new(
&mut store,
&module,
&[
emit.into(),
memory.into(),
dsp.into(),
rsp.into(),
fsp.into(),
table.into(),
],
)
.unwrap();
instance
.get_func(&mut store, "fn")
.unwrap()
.call(&mut store, &[], &mut [])
.unwrap();
// Read float stack
let sp = fsp.get(&mut store).unwrap_i32() as u32;
let data = memory.data(&store);
let mut stack = Vec::new();
let mut addr = sp;
while addr < FLOAT_STACK_TOP {
let b: [u8; 8] = data[addr as usize..addr as usize + 8].try_into().unwrap();
stack.push(f64::from_le_bytes(b));
addr += 8;
}
stack
}
#[test]
fn compile_push_f64_validates() {
let m = compile_word("test", &[IrOp::PushF64(3.14)], &default_config()).unwrap();
validate_wasm(&m.bytes).unwrap();
}
#[test]
fn compile_float_arithmetic_validates() {
let ops = vec![IrOp::PushF64(1.0), IrOp::PushF64(2.0), IrOp::FAdd];
let m = compile_word("fadd", &ops, &default_config()).unwrap();
validate_wasm(&m.bytes).unwrap();
}
#[test]
fn compile_float_cross_stack_validates() {
let ops = vec![IrOp::PushI32(42), IrOp::StoF, IrOp::FtoS];
let m = compile_word("cross", &ops, &default_config()).unwrap();
validate_wasm(&m.bytes).unwrap();
}
#[test]
fn execute_push_f64() {
assert_eq!(run_float_word(&[IrOp::PushF64(3.14)]), vec![3.14]);
}
#[test]
fn execute_float_add() {
let ops = vec![IrOp::PushF64(1.0), IrOp::PushF64(2.0), IrOp::FAdd];
assert_eq!(run_float_word(&ops), vec![3.0]);
}
#[test]
fn execute_float_sub() {
let ops = vec![IrOp::PushF64(5.0), IrOp::PushF64(3.0), IrOp::FSub];
assert_eq!(run_float_word(&ops), vec![2.0]);
}
#[test]
fn execute_float_mul() {
let ops = vec![IrOp::PushF64(3.0), IrOp::PushF64(4.0), IrOp::FMul];
assert_eq!(run_float_word(&ops), vec![12.0]);
}
#[test]
fn execute_float_div() {
let ops = vec![IrOp::PushF64(10.0), IrOp::PushF64(4.0), IrOp::FDiv];
assert_eq!(run_float_word(&ops), vec![2.5]);
}
#[test]
fn execute_float_negate() {
let ops = vec![IrOp::PushF64(3.0), IrOp::FNegate];
assert_eq!(run_float_word(&ops), vec![-3.0]);
}
#[test]
fn execute_float_abs() {
let ops = vec![IrOp::PushF64(-7.0), IrOp::FAbs];
assert_eq!(run_float_word(&ops), vec![7.0]);
}
#[test]
fn execute_float_sqrt() {
let ops = vec![IrOp::PushF64(9.0), IrOp::FSqrt];
assert_eq!(run_float_word(&ops), vec![3.0]);
}
#[test]
fn execute_float_floor() {
let ops = vec![IrOp::PushF64(3.7), IrOp::FFloor];
assert_eq!(run_float_word(&ops), vec![3.0]);
}
#[test]
fn execute_float_round() {
let ops = vec![IrOp::PushF64(2.5), IrOp::FRound];
assert_eq!(run_float_word(&ops), vec![2.0]); // round ties even
}
#[test]
fn execute_float_min_max() {
let ops = vec![IrOp::PushF64(3.0), IrOp::PushF64(5.0), IrOp::FMin];
assert_eq!(run_float_word(&ops), vec![3.0]);
let ops = vec![IrOp::PushF64(3.0), IrOp::PushF64(5.0), IrOp::FMax];
assert_eq!(run_float_word(&ops), vec![5.0]);
}
#[test]
fn execute_fdup() {
let ops = vec![IrOp::PushF64(7.0), IrOp::FDup];
assert_eq!(run_float_word(&ops), vec![7.0, 7.0]);
}
#[test]
fn execute_fdrop() {
let ops = vec![IrOp::PushF64(1.0), IrOp::PushF64(2.0), IrOp::FDrop];
assert_eq!(run_float_word(&ops), vec![1.0]);
}
#[test]
fn execute_fswap() {
let ops = vec![IrOp::PushF64(1.0), IrOp::PushF64(2.0), IrOp::FSwap];
assert_eq!(run_float_word(&ops), vec![1.0, 2.0]);
}
#[test]
fn execute_fover() {
let ops = vec![IrOp::PushF64(1.0), IrOp::PushF64(2.0), IrOp::FOver];
assert_eq!(run_float_word(&ops), vec![1.0, 2.0, 1.0]);
}
#[test]
fn execute_float_zero_eq() {
let ops = vec![IrOp::PushF64(0.0), IrOp::FZeroEq];
assert_eq!(run_word(&ops), vec![-1]);
let ops = vec![IrOp::PushF64(1.0), IrOp::FZeroEq];
assert_eq!(run_word(&ops), vec![0]);
}
#[test]
fn execute_float_zero_lt() {
let ops = vec![IrOp::PushF64(-1.0), IrOp::FZeroLt];
assert_eq!(run_word(&ops), vec![-1]);
let ops = vec![IrOp::PushF64(1.0), IrOp::FZeroLt];
assert_eq!(run_word(&ops), vec![0]);
}
#[test]
fn execute_float_eq() {
let ops = vec![IrOp::PushF64(3.0), IrOp::PushF64(3.0), IrOp::FEq];
assert_eq!(run_word(&ops), vec![-1]);
let ops = vec![IrOp::PushF64(3.0), IrOp::PushF64(4.0), IrOp::FEq];
assert_eq!(run_word(&ops), vec![0]);
}
#[test]
fn execute_float_lt() {
let ops = vec![IrOp::PushF64(2.0), IrOp::PushF64(3.0), IrOp::FLt];
assert_eq!(run_word(&ops), vec![-1]);
let ops = vec![IrOp::PushF64(3.0), IrOp::PushF64(2.0), IrOp::FLt];
assert_eq!(run_word(&ops), vec![0]);
}
#[test]
fn execute_stof_ftos() {
// ( 42 -- ) ( F: -- 42.0 ) then ( F: 42.0 -- ) ( -- 42 )
let ops = vec![IrOp::PushI32(42), IrOp::StoF, IrOp::FtoS];
assert_eq!(run_word(&ops), vec![42]);
}
#[test]
fn execute_fetch_store_float() {
// Store 3.14 at address 0x100, then fetch it back
let ops = vec![
IrOp::PushF64(3.14),
IrOp::PushI32(0x100),
IrOp::StoreFloat,
IrOp::PushI32(0x100),
IrOp::FetchFloat,
];
assert_eq!(run_float_word(&ops), vec![3.14]);
}
}