From bf7581ad9e1a48fceea94042fea06f31351944a1 Mon Sep 17 00:00:00 2001 From: Oleksandr Kozachuk Date: Thu, 2 Apr 2026 13:47:28 +0200 Subject: [PATCH] Implement float IR operations: 25 words compiled to native WASM f64 Convert 25 float words from host functions to IR primitives: - Stack: FDROP FDUP FSWAP FOVER FNIP FTUCK - Arithmetic: F+ F- F* F/ FNEGATE FABS FSQRT FMIN FMAX FLOOR FROUND - Comparisons: F0= F0< F= F< - Memory: F@ F! - Conversions: S>F F>S 24 new IrOp variants compiled to native WASM f64 instructions. EmitCtx struct threads f64 scratch locals through all emit functions. Float constant folding: 1.5E0 2.5E0 F+ folds to PushF64(4.0). Float peephole: PushF64+FDrop, FDup+FDrop, FSwap+FSwap eliminated. Float literals now compile as PushF64 IR ops instead of anonymous host calls. ~420 lines of Rust closure code removed from outer.rs. All 14 optimizations now implemented. 430 tests passing. --- crates/core/src/codegen.rs | 663 ++++++++++++++++++++++++++++++++--- crates/core/src/ir.rs | 56 +++ crates/core/src/optimizer.rs | 133 +++++++ crates/core/src/outer.rs | 464 +++++------------------- 4 files changed, 893 insertions(+), 423 deletions(-) diff --git a/crates/core/src/codegen.rs b/crates/core/src/codegen.rs index b2cbde7..873da2e 100644 --- a/crates/core/src/codegen.rs +++ b/crates/core/src/codegen.rs @@ -34,7 +34,6 @@ const DSP: u32 = 0; const RSP: u32 = 1; /// Index of the `$fsp` global (float stack pointer). -#[allow(dead_code)] const FSP: u32 = 2; /// Index of the imported function table. @@ -74,6 +73,13 @@ const MEM1: MemArg = MemArg { memory_index: MEMORY_INDEX, }; +/// Natural-alignment `MemArg` for 8-byte f64 operations. +const MEM8: MemArg = MemArg { + offset: 0, + align: 3, // 2^3 = 8 + memory_index: MEMORY_INDEX, +}; + // --------------------------------------------------------------------------- // Public types // --------------------------------------------------------------------------- @@ -214,24 +220,119 @@ fn bool_to_forth_flag(f: &mut Function, tmp: u32) { .instruction(&Instruction::I32Sub); } +// --------------------------------------------------------------------------- +// Float stack helpers +// --------------------------------------------------------------------------- + +/// Carries f64 scratch local indices for float codegen. +struct EmitCtx { + f64_local_0: u32, + f64_local_1: u32, +} + +/// Decrement the FSP global by 8 (allocate space for one f64). +fn fsp_dec(f: &mut Function) { + f.instruction(&Instruction::GlobalGet(FSP)) + .instruction(&Instruction::I32Const(8)) + .instruction(&Instruction::I32Sub) + .instruction(&Instruction::GlobalSet(FSP)); +} + +/// Increment the FSP global by 8 (free space for one f64). +fn fsp_inc(f: &mut Function) { + f.instruction(&Instruction::GlobalGet(FSP)) + .instruction(&Instruction::I32Const(8)) + .instruction(&Instruction::I32Add) + .instruction(&Instruction::GlobalSet(FSP)); +} + +/// Save an f64 from the WASM operand stack into `tmp`, decrement FSP, +/// then store the f64 at [FSP]. +fn fpush_via_local(f: &mut Function, tmp: u32) { + f.instruction(&Instruction::LocalSet(tmp)); + fsp_dec(f); + f.instruction(&Instruction::GlobalGet(FSP)) + .instruction(&Instruction::LocalGet(tmp)) + .instruction(&Instruction::F64Store(MEM8)); +} + +/// Decrement FSP, then store the f64 from local `src` at [FSP]. +fn fpush_from_local(f: &mut Function, src: u32) { + fsp_dec(f); + f.instruction(&Instruction::GlobalGet(FSP)) + .instruction(&Instruction::LocalGet(src)) + .instruction(&Instruction::F64Store(MEM8)); +} + +/// Load f64 from [FSP] onto the WASM operand stack, then increment FSP. +fn fpop(f: &mut Function) { + f.instruction(&Instruction::GlobalGet(FSP)) + .instruction(&Instruction::F64Load(MEM8)); + fsp_inc(f); +} + +/// Load f64 from [FSP] onto the WASM operand stack without popping. +fn fpeek(f: &mut Function) { + f.instruction(&Instruction::GlobalGet(FSP)) + .instruction(&Instruction::F64Load(MEM8)); +} + +/// Pop two floats (b then a), apply binary op, push result. +fn emit_float_binary(f: &mut Function, ctx: &EmitCtx, wasm_op: &Instruction<'_>) { + fpop(f); + f.instruction(&Instruction::LocalSet(ctx.f64_local_0)); + fpop(f); + f.instruction(&Instruction::LocalSet(ctx.f64_local_1)); + f.instruction(&Instruction::LocalGet(ctx.f64_local_1)) + .instruction(&Instruction::LocalGet(ctx.f64_local_0)) + .instruction(wasm_op); + fpush_via_local(f, ctx.f64_local_0); +} + +/// Pop one float, apply unary op, push result. +fn emit_float_unary(f: &mut Function, ctx: &EmitCtx, wasm_op: &Instruction<'_>) { + fpop(f); + f.instruction(wasm_op); + fpush_via_local(f, ctx.f64_local_0); +} + +/// Pop two floats, compare, push Forth flag to data stack. +fn emit_float_cmp(f: &mut Function, ctx: &EmitCtx, wasm_cmp: &Instruction<'_>) { + fpop(f); + f.instruction(&Instruction::LocalSet(ctx.f64_local_0)); + fpop(f); + f.instruction(&Instruction::LocalSet(ctx.f64_local_1)); + f.instruction(&Instruction::LocalGet(ctx.f64_local_1)) + .instruction(&Instruction::LocalGet(ctx.f64_local_0)) + .instruction(wasm_cmp); + bool_to_forth_flag(f, SCRATCH_BASE); + push_via_local(f, SCRATCH_BASE + 1); +} + // --------------------------------------------------------------------------- // IR emission // --------------------------------------------------------------------------- /// Emit all IR operations in `ops` into the WASM function body `f`. -fn emit_body(f: &mut Function, ops: &[IrOp]) { +fn emit_body(f: &mut Function, ops: &[IrOp], ctx: &EmitCtx) { for op in ops { - emit_op(f, op); + emit_op(f, op, ctx); } } /// Emit a single IR operation. #[allow(clippy::too_many_lines)] -fn emit_op(f: &mut Function, op: &IrOp) { +fn emit_op(f: &mut Function, op: &IrOp, ctx: &EmitCtx) { match op { // -- Literals ------------------------------------------------------- IrOp::PushI32(n) => push_const(f, *n), - IrOp::PushI64(_) | IrOp::PushF64(_) => { /* TODO: double / float stacks */ } + IrOp::PushI64(_) => { /* TODO: double-cell */ } + IrOp::PushF64(val) => { + fsp_dec(f); + f.instruction(&Instruction::GlobalGet(FSP)) + .instruction(&Instruction::F64Const(*val)) + .instruction(&Instruction::F64Store(MEM8)); + } // -- Stack manipulation --------------------------------------------- IrOp::Drop => dsp_inc(f), @@ -460,21 +561,21 @@ fn emit_op(f: &mut Function, op: &IrOp) { } => { pop(f); f.instruction(&Instruction::If(BlockType::Empty)); - emit_body(f, then_body); + emit_body(f, then_body, ctx); if let Some(eb) = else_body { f.instruction(&Instruction::Else); - emit_body(f, eb); + emit_body(f, eb, ctx); } f.instruction(&Instruction::End); } IrOp::DoLoop { body, is_plus_loop } => { - emit_do_loop(f, body, *is_plus_loop); + emit_do_loop(f, body, *is_plus_loop, ctx); } IrOp::BeginUntil { body } => { f.instruction(&Instruction::Loop(BlockType::Empty)); - emit_body(f, body); + emit_body(f, body, ctx); pop(f); f.instruction(&Instruction::I32Eqz) .instruction(&Instruction::BrIf(0)) @@ -483,7 +584,7 @@ fn emit_op(f: &mut Function, op: &IrOp) { IrOp::BeginAgain { body } => { f.instruction(&Instruction::Loop(BlockType::Empty)); - emit_body(f, body); + emit_body(f, body, ctx); f.instruction(&Instruction::Br(0)) .instruction(&Instruction::End); } @@ -491,11 +592,11 @@ fn emit_op(f: &mut Function, op: &IrOp) { IrOp::BeginWhileRepeat { test, body } => { f.instruction(&Instruction::Block(BlockType::Empty)); f.instruction(&Instruction::Loop(BlockType::Empty)); - emit_body(f, test); + emit_body(f, test, ctx); pop(f); f.instruction(&Instruction::I32Eqz) .instruction(&Instruction::BrIf(1)); // break to outer block - emit_body(f, body); + emit_body(f, body, ctx); f.instruction(&Instruction::Br(0)) // continue loop .instruction(&Instruction::End) // end loop .instruction(&Instruction::End); // end block @@ -530,25 +631,25 @@ fn emit_op(f: &mut Function, op: &IrOp) { f.instruction(&Instruction::Block(BlockType::Empty)); // $else f.instruction(&Instruction::Block(BlockType::Empty)); // $after f.instruction(&Instruction::Loop(BlockType::Empty)); // $begin - emit_body(f, outer_test); + emit_body(f, outer_test, ctx); pop(f); f.instruction(&Instruction::I32Eqz) .instruction(&Instruction::BrIf(2)); // to $else - emit_body(f, inner_test); + emit_body(f, inner_test, ctx); pop(f); f.instruction(&Instruction::I32Eqz) .instruction(&Instruction::BrIf(1)); // to $after - emit_body(f, body); + emit_body(f, body, ctx); f.instruction(&Instruction::Br(0)); // back to $begin f.instruction(&Instruction::End); // end loop f.instruction(&Instruction::End); // end $after block - emit_body(f, after_repeat); + emit_body(f, after_repeat, ctx); if else_body.is_some() { f.instruction(&Instruction::Br(1)); // skip else, goto $end } f.instruction(&Instruction::End); // end $else block if let Some(eb) = else_body { - emit_body(f, eb); + emit_body(f, eb, ctx); } f.instruction(&Instruction::End); // end $end block } @@ -647,6 +748,90 @@ fn emit_op(f: &mut Function, op: &IrOp) { .instruction(&Instruction::I32Add) .instruction(&Instruction::LocalSet(CACHED_DSP_LOCAL)); } + + // -- Float stack ops ----------------------------------------------- + IrOp::FDrop => fsp_inc(f), + IrOp::FDup => { + fpeek(f); + fpush_via_local(f, ctx.f64_local_0); + } + IrOp::FSwap => { + fpop(f); + f.instruction(&Instruction::LocalSet(ctx.f64_local_0)); + fpop(f); + f.instruction(&Instruction::LocalSet(ctx.f64_local_1)); + fpush_from_local(f, ctx.f64_local_0); + fpush_from_local(f, ctx.f64_local_1); + } + IrOp::FOver => { + f.instruction(&Instruction::GlobalGet(FSP)) + .instruction(&Instruction::I32Const(8)) + .instruction(&Instruction::I32Add) + .instruction(&Instruction::F64Load(MEM8)); + fpush_via_local(f, ctx.f64_local_0); + } + + // -- Float arithmetic ---------------------------------------------- + IrOp::FAdd => emit_float_binary(f, ctx, &Instruction::F64Add), + IrOp::FSub => emit_float_binary(f, ctx, &Instruction::F64Sub), + IrOp::FMul => emit_float_binary(f, ctx, &Instruction::F64Mul), + IrOp::FDiv => emit_float_binary(f, ctx, &Instruction::F64Div), + IrOp::FMin => emit_float_binary(f, ctx, &Instruction::F64Min), + IrOp::FMax => emit_float_binary(f, ctx, &Instruction::F64Max), + IrOp::FNegate => emit_float_unary(f, ctx, &Instruction::F64Neg), + IrOp::FAbs => emit_float_unary(f, ctx, &Instruction::F64Abs), + IrOp::FSqrt => emit_float_unary(f, ctx, &Instruction::F64Sqrt), + IrOp::FFloor => emit_float_unary(f, ctx, &Instruction::F64Floor), + IrOp::FRound => emit_float_unary(f, ctx, &Instruction::F64Nearest), + + // -- Float comparisons (cross-stack) -------------------------------- + IrOp::FZeroEq => { + fpop(f); + f.instruction(&Instruction::F64Const(0.0)) + .instruction(&Instruction::F64Eq); + bool_to_forth_flag(f, SCRATCH_BASE); + push_via_local(f, SCRATCH_BASE + 1); + } + IrOp::FZeroLt => { + fpop(f); + f.instruction(&Instruction::F64Const(0.0)) + .instruction(&Instruction::F64Lt); + bool_to_forth_flag(f, SCRATCH_BASE); + push_via_local(f, SCRATCH_BASE + 1); + } + IrOp::FEq => emit_float_cmp(f, ctx, &Instruction::F64Eq), + IrOp::FLt => emit_float_cmp(f, ctx, &Instruction::F64Lt), + + // -- Float memory (cross-stack) ------------------------------------ + IrOp::FetchFloat => { + // ( addr -- ) ( F: -- r ) + pop(f); // addr on operand stack + f.instruction(&Instruction::F64Load(MEM8)); + fpush_via_local(f, ctx.f64_local_0); + } + IrOp::StoreFloat => { + // ( addr -- ) ( F: r -- ) + pop_to(f, SCRATCH_BASE); // addr + fpop(f); + f.instruction(&Instruction::LocalSet(ctx.f64_local_0)); + f.instruction(&Instruction::LocalGet(SCRATCH_BASE)) + .instruction(&Instruction::LocalGet(ctx.f64_local_0)) + .instruction(&Instruction::F64Store(MEM8)); + } + + // -- Float/integer conversions (cross-stack) ----------------------- + IrOp::StoF => { + // ( n -- ) ( F: -- r ) + pop(f); + f.instruction(&Instruction::F64ConvertI32S); + fpush_via_local(f, ctx.f64_local_0); + } + IrOp::FtoS => { + // ( F: r -- ) ( -- n ) + fpop(f); + f.instruction(&Instruction::I32TruncF64S); + push_via_local(f, SCRATCH_BASE); + } } } @@ -684,7 +869,7 @@ fn emit_cmp(f: &mut Function, cmp: &Instruction<'_>) { } /// Emit a DO...LOOP / DO...+LOOP construct. -fn emit_do_loop(f: &mut Function, body: &[IrOp], is_plus_loop: bool) { +fn emit_do_loop(f: &mut Function, body: &[IrOp], is_plus_loop: bool, ctx: &EmitCtx) { // DO ( limit index -- ) pop_to(f, SCRATCH_BASE); // index pop_to(f, SCRATCH_BASE + 1); // limit @@ -704,7 +889,7 @@ fn emit_do_loop(f: &mut Function, body: &[IrOp], is_plus_loop: bool) { f.instruction(&Instruction::Block(BlockType::Empty)); f.instruction(&Instruction::Loop(BlockType::Empty)); - emit_body(f, body); + emit_body(f, body, ctx); // Pop current index from return stack into scratch local rpop(f); @@ -807,6 +992,29 @@ fn is_promotable(ops: &[IrOp]) -> bool { IrOp::ToR | IrOp::FromR | IrOp::RFetch => return false, IrOp::Emit | IrOp::Dot | IrOp::Cr | IrOp::Type => return false, IrOp::PushI64(_) | IrOp::PushF64(_) => return false, + IrOp::FDup + | IrOp::FDrop + | IrOp::FSwap + | IrOp::FOver + | IrOp::FAdd + | IrOp::FSub + | IrOp::FMul + | IrOp::FDiv + | IrOp::FNegate + | IrOp::FAbs + | IrOp::FSqrt + | IrOp::FMin + | IrOp::FMax + | IrOp::FFloor + | IrOp::FRound + | IrOp::FZeroEq + | IrOp::FZeroLt + | IrOp::FEq + | IrOp::FLt + | IrOp::FetchFloat + | IrOp::StoreFloat + | IrOp::StoF + | IrOp::FtoS => return false, _ => {} } } @@ -839,6 +1047,27 @@ fn stack_delta(op: &IrOp) -> i32 { IrOp::Store | IrOp::CStore | IrOp::PlusStore => -2, IrOp::TwoDup => 2, IrOp::TwoDrop => -2, + // Float-only ops: no data stack change + IrOp::PushF64(_) + | IrOp::FDup + | IrOp::FDrop + | IrOp::FSwap + | IrOp::FOver + | IrOp::FAdd + | IrOp::FSub + | IrOp::FMul + | IrOp::FDiv + | IrOp::FNegate + | IrOp::FAbs + | IrOp::FSqrt + | IrOp::FMin + | IrOp::FMax + | IrOp::FFloor + | IrOp::FRound => 0, + // Cross-stack: push to data stack + IrOp::FZeroEq | IrOp::FZeroLt | IrOp::FEq | IrOp::FLt | IrOp::FtoS => 1, + // Cross-stack: pop from data stack + IrOp::FetchFloat | IrOp::StoreFloat | IrOp::StoF => -1, _ => 0, } } @@ -897,7 +1126,9 @@ fn compute_stack_needs(ops: &[IrOp]) -> (u32, i32) { | IrOp::Fetch | IrOp::CFetch => depth - 1, IrOp::TwoDrop => depth - 2, - // Push ops don't read existing items + // Cross-stack ops that pop from data stack + IrOp::FetchFloat | IrOp::StoreFloat | IrOp::StoF => depth - 1, + // Push ops and float-only ops don't read data stack items _ => depth, }; min_accessed = min_accessed.min(reads_from); @@ -1320,6 +1551,83 @@ fn emit_promoted_cmp(f: &mut Function, sim: &mut StackSim, cmp: &Instruction<'_> // Public API // --------------------------------------------------------------------------- +/// Check if an IR body (recursively) contains any float ops that need f64 locals. +fn needs_f64_locals(ops: &[IrOp]) -> bool { + for op in ops { + match op { + IrOp::PushF64(_) + | IrOp::FDup + | IrOp::FDrop + | IrOp::FSwap + | IrOp::FOver + | IrOp::FAdd + | IrOp::FSub + | IrOp::FMul + | IrOp::FDiv + | IrOp::FNegate + | IrOp::FAbs + | IrOp::FSqrt + | IrOp::FMin + | IrOp::FMax + | IrOp::FFloor + | IrOp::FRound + | IrOp::FZeroEq + | IrOp::FZeroLt + | IrOp::FEq + | IrOp::FLt + | IrOp::FetchFloat + | IrOp::StoreFloat + | IrOp::StoF + | IrOp::FtoS => return true, + IrOp::If { + then_body, + else_body, + } => { + if needs_f64_locals(then_body) { + return true; + } + if let Some(eb) = else_body + && needs_f64_locals(eb) + { + return true; + } + } + IrOp::DoLoop { body, .. } | IrOp::BeginUntil { body } | IrOp::BeginAgain { body } => { + if needs_f64_locals(body) { + return true; + } + } + IrOp::BeginWhileRepeat { test, body } => { + if needs_f64_locals(test) || needs_f64_locals(body) { + return true; + } + } + IrOp::BeginDoubleWhileRepeat { + outer_test, + inner_test, + body, + after_repeat, + else_body, + } => { + if needs_f64_locals(outer_test) + || needs_f64_locals(inner_test) + || needs_f64_locals(body) + || needs_f64_locals(after_repeat) + { + return true; + } + if let Some(eb) = else_body + && needs_f64_locals(eb) + { + return true; + } + } + _ => {} + } + } + false +} + /// Estimate scratch locals a function body needs (not counting cached DSP). fn count_scratch_locals(ops: &[IrOp]) -> u32 { let mut max: u32 = 4; // baseline scratch space (indices SCRATCH_BASE..SCRATCH_BASE+3) @@ -1469,7 +1777,17 @@ pub fn compile_word( } else { 1 + scratch_count }; - let mut func = Function::new(vec![(num_locals, ValType::I32)]); + let has_floats = needs_f64_locals(body); + let num_f64: u32 = if has_floats { 2 } else { 0 }; + let mut locals_decl = vec![(num_locals, ValType::I32)]; + if num_f64 > 0 { + locals_decl.push((num_f64, ValType::F64)); + } + let mut func = Function::new(locals_decl); + let ctx = EmitCtx { + f64_local_0: num_locals, + f64_local_1: num_locals + 1, + }; // Prologue: cache $dsp global into local 0 func.instruction(&Instruction::GlobalGet(DSP)) @@ -1485,7 +1803,7 @@ pub fn compile_word( } emit_promoted_epilogue(&mut func, &mut sim); } else { - emit_body(&mut func, body); + emit_body(&mut func, body, &ctx); } // Epilogue: write cached DSP back to the $dsp global @@ -1517,9 +1835,14 @@ pub fn compile_word( /// Emit all IR operations, replacing `Call`/`TailCall` with direct calls /// when the target word is within the consolidated module. -fn emit_consolidated_body(f: &mut Function, ops: &[IrOp], local_fn_map: &HashMap) { +fn emit_consolidated_body( + f: &mut Function, + ops: &[IrOp], + local_fn_map: &HashMap, + ctx: &EmitCtx, +) { for op in ops { - emit_consolidated_op(f, op, local_fn_map); + emit_consolidated_op(f, op, local_fn_map, ctx); } } @@ -1528,7 +1851,12 @@ fn emit_consolidated_body(f: &mut Function, ops: &[IrOp], local_fn_map: &HashMap /// For `Call` and `TailCall`, emits a direct `call` if the target is in the /// consolidated module, otherwise falls back to `call_indirect`. For control /// flow with nested bodies, recurses to handle inner calls. -fn emit_consolidated_op(f: &mut Function, op: &IrOp, local_fn_map: &HashMap) { +fn emit_consolidated_op( + f: &mut Function, + op: &IrOp, + local_fn_map: &HashMap, + ctx: &EmitCtx, +) { match op { IrOp::Call(word_id) => { if let Some(&fn_idx) = local_fn_map.get(word_id) { @@ -1570,21 +1898,21 @@ fn emit_consolidated_op(f: &mut Function, op: &IrOp, local_fn_map: &HashMap { pop(f); f.instruction(&Instruction::If(BlockType::Empty)); - emit_consolidated_body(f, then_body, local_fn_map); + emit_consolidated_body(f, then_body, local_fn_map, ctx); if let Some(eb) = else_body { f.instruction(&Instruction::Else); - emit_consolidated_body(f, eb, local_fn_map); + emit_consolidated_body(f, eb, local_fn_map, ctx); } f.instruction(&Instruction::End); } IrOp::DoLoop { body, is_plus_loop } => { - emit_consolidated_do_loop(f, body, *is_plus_loop, local_fn_map); + emit_consolidated_do_loop(f, body, *is_plus_loop, local_fn_map, ctx); } IrOp::BeginUntil { body } => { f.instruction(&Instruction::Loop(BlockType::Empty)); - emit_consolidated_body(f, body, local_fn_map); + emit_consolidated_body(f, body, local_fn_map, ctx); pop(f); f.instruction(&Instruction::I32Eqz) .instruction(&Instruction::BrIf(0)) @@ -1593,7 +1921,7 @@ fn emit_consolidated_op(f: &mut Function, op: &IrOp, local_fn_map: &HashMap { f.instruction(&Instruction::Loop(BlockType::Empty)); - emit_consolidated_body(f, body, local_fn_map); + emit_consolidated_body(f, body, local_fn_map, ctx); f.instruction(&Instruction::Br(0)) .instruction(&Instruction::End); } @@ -1601,11 +1929,11 @@ fn emit_consolidated_op(f: &mut Function, op: &IrOp, local_fn_map: &HashMap { f.instruction(&Instruction::Block(BlockType::Empty)); f.instruction(&Instruction::Loop(BlockType::Empty)); - emit_consolidated_body(f, test, local_fn_map); + emit_consolidated_body(f, test, local_fn_map, ctx); pop(f); f.instruction(&Instruction::I32Eqz) .instruction(&Instruction::BrIf(1)); - emit_consolidated_body(f, body, local_fn_map); + emit_consolidated_body(f, body, local_fn_map, ctx); f.instruction(&Instruction::Br(0)) .instruction(&Instruction::End) .instruction(&Instruction::End); @@ -1622,31 +1950,31 @@ fn emit_consolidated_op(f: &mut Function, op: &IrOp, local_fn_map: &HashMap emit_op(f, other), + other => emit_op(f, other, ctx), } } @@ -1656,6 +1984,7 @@ fn emit_consolidated_do_loop( body: &[IrOp], is_plus_loop: bool, local_fn_map: &HashMap, + ctx: &EmitCtx, ) { // DO ( limit index -- ) pop_to(f, SCRATCH_BASE); // index @@ -1670,7 +1999,7 @@ fn emit_consolidated_do_loop( f.instruction(&Instruction::Block(BlockType::Empty)); f.instruction(&Instruction::Loop(BlockType::Empty)); - emit_consolidated_body(f, body, local_fn_map); + emit_consolidated_body(f, body, local_fn_map, ctx); // Pop current index from return stack into scratch local rpop(f); @@ -1849,14 +2178,24 @@ pub fn compile_consolidated_module( let mut code = CodeSection::new(); for (_word_id, body) in words { let num_locals = 1 + count_scratch_locals(body); - let mut func = Function::new(vec![(num_locals, ValType::I32)]); + let has_floats = needs_f64_locals(body); + let num_f64: u32 = if has_floats { 2 } else { 0 }; + let mut locals_decl = vec![(num_locals, ValType::I32)]; + if num_f64 > 0 { + locals_decl.push((num_f64, ValType::F64)); + } + let mut func = Function::new(locals_decl); + let ctx = EmitCtx { + f64_local_0: num_locals, + f64_local_1: num_locals + 1, + }; // Prologue: cache $dsp global into local 0 func.instruction(&Instruction::GlobalGet(DSP)) .instruction(&Instruction::LocalSet(CACHED_DSP_LOCAL)); // Body with consolidated call support - emit_consolidated_body(&mut func, body, local_fn_map); + emit_consolidated_body(&mut func, body, local_fn_map, &ctx); // Epilogue: write cached DSP back to the $dsp global func.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL)) @@ -2746,4 +3085,246 @@ mod tests { assert!(!is_promotable(&ops)); assert_eq!(run_word(&ops), vec![42]); } + + // =================================================================== + // Float IR tests + // =================================================================== + + /// Run a compiled word and return the float stack (top first). + fn run_float_word(ops: &[IrOp]) -> Vec { + use wasmtime::*; + + let compiled = compile_word("test", ops, &default_config()).unwrap(); + let engine = Engine::default(); + let mut store = Store::new(&engine, ()); + + let memory = Memory::new(&mut store, MemoryType::new(16, None)).unwrap(); + + let dsp = Global::new( + &mut store, + wasmtime::GlobalType::new(ValType::I32, Mutability::Var), + Val::I32(DATA_STACK_TOP as i32), + ) + .unwrap(); + + let rsp = Global::new( + &mut store, + wasmtime::GlobalType::new(ValType::I32, Mutability::Var), + Val::I32(RETURN_STACK_TOP as i32), + ) + .unwrap(); + + let fsp = Global::new( + &mut store, + wasmtime::GlobalType::new(ValType::I32, Mutability::Var), + Val::I32(FLOAT_STACK_TOP as i32), + ) + .unwrap(); + + let table = Table::new( + &mut store, + wasmtime::TableType::new(RefType::FUNCREF, 16, None), + Ref::Func(None), + ) + .unwrap(); + + let emit_ty = FuncType::new(&engine, [ValType::I32], []); + let emit = Func::new(&mut store, emit_ty, |_caller, _params, _results| Ok(())); + + let module = wasmtime::Module::new(&engine, &compiled.bytes).unwrap(); + let instance = Instance::new( + &mut store, + &module, + &[ + emit.into(), + memory.into(), + dsp.into(), + rsp.into(), + fsp.into(), + table.into(), + ], + ) + .unwrap(); + + instance + .get_func(&mut store, "fn") + .unwrap() + .call(&mut store, &[], &mut []) + .unwrap(); + + // Read float stack + let sp = fsp.get(&mut store).unwrap_i32() as u32; + let data = memory.data(&store); + let mut stack = Vec::new(); + let mut addr = sp; + while addr < FLOAT_STACK_TOP { + let b: [u8; 8] = data[addr as usize..addr as usize + 8].try_into().unwrap(); + stack.push(f64::from_le_bytes(b)); + addr += 8; + } + stack + } + + #[test] + fn compile_push_f64_validates() { + let m = compile_word("test", &[IrOp::PushF64(3.14)], &default_config()).unwrap(); + validate_wasm(&m.bytes).unwrap(); + } + + #[test] + fn compile_float_arithmetic_validates() { + let ops = vec![IrOp::PushF64(1.0), IrOp::PushF64(2.0), IrOp::FAdd]; + let m = compile_word("fadd", &ops, &default_config()).unwrap(); + validate_wasm(&m.bytes).unwrap(); + } + + #[test] + fn compile_float_cross_stack_validates() { + let ops = vec![IrOp::PushI32(42), IrOp::StoF, IrOp::FtoS]; + let m = compile_word("cross", &ops, &default_config()).unwrap(); + validate_wasm(&m.bytes).unwrap(); + } + + #[test] + fn execute_push_f64() { + assert_eq!(run_float_word(&[IrOp::PushF64(3.14)]), vec![3.14]); + } + + #[test] + fn execute_float_add() { + let ops = vec![IrOp::PushF64(1.0), IrOp::PushF64(2.0), IrOp::FAdd]; + assert_eq!(run_float_word(&ops), vec![3.0]); + } + + #[test] + fn execute_float_sub() { + let ops = vec![IrOp::PushF64(5.0), IrOp::PushF64(3.0), IrOp::FSub]; + assert_eq!(run_float_word(&ops), vec![2.0]); + } + + #[test] + fn execute_float_mul() { + let ops = vec![IrOp::PushF64(3.0), IrOp::PushF64(4.0), IrOp::FMul]; + assert_eq!(run_float_word(&ops), vec![12.0]); + } + + #[test] + fn execute_float_div() { + let ops = vec![IrOp::PushF64(10.0), IrOp::PushF64(4.0), IrOp::FDiv]; + assert_eq!(run_float_word(&ops), vec![2.5]); + } + + #[test] + fn execute_float_negate() { + let ops = vec![IrOp::PushF64(3.0), IrOp::FNegate]; + assert_eq!(run_float_word(&ops), vec![-3.0]); + } + + #[test] + fn execute_float_abs() { + let ops = vec![IrOp::PushF64(-7.0), IrOp::FAbs]; + assert_eq!(run_float_word(&ops), vec![7.0]); + } + + #[test] + fn execute_float_sqrt() { + let ops = vec![IrOp::PushF64(9.0), IrOp::FSqrt]; + assert_eq!(run_float_word(&ops), vec![3.0]); + } + + #[test] + fn execute_float_floor() { + let ops = vec![IrOp::PushF64(3.7), IrOp::FFloor]; + assert_eq!(run_float_word(&ops), vec![3.0]); + } + + #[test] + fn execute_float_round() { + let ops = vec![IrOp::PushF64(2.5), IrOp::FRound]; + assert_eq!(run_float_word(&ops), vec![2.0]); // round ties even + } + + #[test] + fn execute_float_min_max() { + let ops = vec![IrOp::PushF64(3.0), IrOp::PushF64(5.0), IrOp::FMin]; + assert_eq!(run_float_word(&ops), vec![3.0]); + let ops = vec![IrOp::PushF64(3.0), IrOp::PushF64(5.0), IrOp::FMax]; + assert_eq!(run_float_word(&ops), vec![5.0]); + } + + #[test] + fn execute_fdup() { + let ops = vec![IrOp::PushF64(7.0), IrOp::FDup]; + assert_eq!(run_float_word(&ops), vec![7.0, 7.0]); + } + + #[test] + fn execute_fdrop() { + let ops = vec![IrOp::PushF64(1.0), IrOp::PushF64(2.0), IrOp::FDrop]; + assert_eq!(run_float_word(&ops), vec![1.0]); + } + + #[test] + fn execute_fswap() { + let ops = vec![IrOp::PushF64(1.0), IrOp::PushF64(2.0), IrOp::FSwap]; + assert_eq!(run_float_word(&ops), vec![1.0, 2.0]); + } + + #[test] + fn execute_fover() { + let ops = vec![IrOp::PushF64(1.0), IrOp::PushF64(2.0), IrOp::FOver]; + assert_eq!(run_float_word(&ops), vec![1.0, 2.0, 1.0]); + } + + #[test] + fn execute_float_zero_eq() { + let ops = vec![IrOp::PushF64(0.0), IrOp::FZeroEq]; + assert_eq!(run_word(&ops), vec![-1]); + let ops = vec![IrOp::PushF64(1.0), IrOp::FZeroEq]; + assert_eq!(run_word(&ops), vec![0]); + } + + #[test] + fn execute_float_zero_lt() { + let ops = vec![IrOp::PushF64(-1.0), IrOp::FZeroLt]; + assert_eq!(run_word(&ops), vec![-1]); + let ops = vec![IrOp::PushF64(1.0), IrOp::FZeroLt]; + assert_eq!(run_word(&ops), vec![0]); + } + + #[test] + fn execute_float_eq() { + let ops = vec![IrOp::PushF64(3.0), IrOp::PushF64(3.0), IrOp::FEq]; + assert_eq!(run_word(&ops), vec![-1]); + let ops = vec![IrOp::PushF64(3.0), IrOp::PushF64(4.0), IrOp::FEq]; + assert_eq!(run_word(&ops), vec![0]); + } + + #[test] + fn execute_float_lt() { + let ops = vec![IrOp::PushF64(2.0), IrOp::PushF64(3.0), IrOp::FLt]; + assert_eq!(run_word(&ops), vec![-1]); + let ops = vec![IrOp::PushF64(3.0), IrOp::PushF64(2.0), IrOp::FLt]; + assert_eq!(run_word(&ops), vec![0]); + } + + #[test] + fn execute_stof_ftos() { + // ( 42 -- ) ( F: -- 42.0 ) then ( F: 42.0 -- ) ( -- 42 ) + let ops = vec![IrOp::PushI32(42), IrOp::StoF, IrOp::FtoS]; + assert_eq!(run_word(&ops), vec![42]); + } + + #[test] + fn execute_fetch_store_float() { + // Store 3.14 at address 0x100, then fetch it back + let ops = vec![ + IrOp::PushF64(3.14), + IrOp::PushI32(0x100), + IrOp::StoreFloat, + IrOp::PushI32(0x100), + IrOp::FetchFloat, + ]; + assert_eq!(run_float_word(&ops), vec![3.14]); + } } diff --git a/crates/core/src/ir.rs b/crates/core/src/ir.rs index 0e2d5f3..a340740 100644 --- a/crates/core/src/ir.rs +++ b/crates/core/src/ir.rs @@ -133,6 +133,62 @@ pub enum IrOp { // -- System -- /// Execute word by function table index: ( xt -- ) Execute, + + // -- Float stack manipulation -- + /// Float duplicate: ( F: r -- r r ) + FDup, + /// Float drop: ( F: r -- ) + FDrop, + /// Float swap: ( F: r1 r2 -- r2 r1 ) + FSwap, + /// Float over: ( F: r1 r2 -- r1 r2 r1 ) + FOver, + + // -- Float arithmetic -- + /// Float add: ( F: r1 r2 -- r1+r2 ) + FAdd, + /// Float subtract: ( F: r1 r2 -- r1-r2 ) + FSub, + /// Float multiply: ( F: r1 r2 -- r1*r2 ) + FMul, + /// Float divide: ( F: r1 r2 -- r1/r2 ) + FDiv, + /// Float negate: ( F: r -- -r ) + FNegate, + /// Float absolute value: ( F: r -- |r| ) + FAbs, + /// Float square root: ( F: r -- sqrt(r) ) + FSqrt, + /// Float minimum: ( F: r1 r2 -- min(r1,r2) ) + FMin, + /// Float maximum: ( F: r1 r2 -- max(r1,r2) ) + FMax, + /// Float floor: ( F: r -- floor(r) ) + FFloor, + /// Float round to nearest even: ( F: r -- round(r) ) + FRound, + + // -- Float comparisons (cross-stack: pop float, push data) -- + /// Float zero equal: ( F: r -- ) ( -- flag ) + FZeroEq, + /// Float zero less-than: ( F: r -- ) ( -- flag ) + FZeroLt, + /// Float equal: ( F: r1 r2 -- ) ( -- flag ) + FEq, + /// Float less-than: ( F: r1 r2 -- ) ( -- flag ) + FLt, + + // -- Float memory (cross-stack) -- + /// Float fetch: ( addr -- ) ( F: -- r ) + FetchFloat, + /// Float store: ( addr -- ) ( F: r -- ) + StoreFloat, + + // -- Float/integer conversions (cross-stack) -- + /// Single to float: ( n -- ) ( F: -- r ) + StoF, + /// Float to single: ( F: r -- ) ( -- n ) + FtoS, } /// A compiled word definition as IR. diff --git a/crates/core/src/optimizer.rs b/crates/core/src/optimizer.rs index 19c8586..c7dc8c0 100644 --- a/crates/core/src/optimizer.rs +++ b/crates/core/src/optimizer.rs @@ -194,6 +194,26 @@ fn peephole_one_pass(ops: Vec) -> Vec { out.pop(); continue; } + // PushF64, FDrop => remove both + (IrOp::PushF64(_), IrOp::FDrop) => { + out.pop(); + continue; + } + // FDup, FDrop => remove both + (IrOp::FDup, IrOp::FDrop) => { + out.pop(); + continue; + } + // FSwap, FSwap => remove both + (IrOp::FSwap, IrOp::FSwap) => { + out.pop(); + continue; + } + // FNegate, FNegate => remove both + (IrOp::FNegate, IrOp::FNegate) => { + out.pop(); + continue; + } // Over, Over => TwoDup (IrOp::Over, IrOp::Over) => { out.pop(); @@ -236,6 +256,17 @@ fn constant_fold(ops: Vec) -> Vec { continue; } + // Try float binary fold: last two outputs are PushF64 + if out.len() >= 2 + && let Some(result) = + try_float_binary_fold(&out[out.len() - 2], &out[out.len() - 1], &op) + { + out.pop(); + out.pop(); + out.push(IrOp::PushF64(result)); + continue; + } + // Try unary fold: last output is PushI32, current op is foldable if !out.is_empty() && let Some(result) = try_unary_fold(&out[out.len() - 1], &op) @@ -245,6 +276,15 @@ fn constant_fold(ops: Vec) -> Vec { continue; } + // Try float unary fold: last output is PushF64 + if !out.is_empty() + && let Some(result) = try_float_unary_fold(&out[out.len() - 1], &op) + { + out.pop(); + out.push(IrOp::PushF64(result)); + continue; + } + out.push(op); } out @@ -317,6 +357,53 @@ fn try_unary_fold(n_op: &IrOp, op: &IrOp) -> Option { } } +/// Try to fold a binary float operation on two constants. +fn try_float_binary_fold(a_op: &IrOp, b_op: &IrOp, op: &IrOp) -> Option { + let (a, b) = match (a_op, b_op) { + (IrOp::PushF64(a), IrOp::PushF64(b)) => (*a, *b), + _ => return None, + }; + + match op { + IrOp::FAdd => Some(a + b), + IrOp::FSub => Some(a - b), + IrOp::FMul => Some(a * b), + IrOp::FDiv => { + if b != 0.0 { + Some(a / b) + } else { + None + } + } + IrOp::FMin => Some(a.min(b)), + IrOp::FMax => Some(a.max(b)), + _ => None, + } +} + +/// Try to fold a unary float operation on a constant. +fn try_float_unary_fold(n_op: &IrOp, op: &IrOp) -> Option { + let n = match n_op { + IrOp::PushF64(n) => *n, + _ => return None, + }; + + match op { + IrOp::FNegate => Some(-n), + IrOp::FAbs => Some(n.abs()), + IrOp::FSqrt => { + if n >= 0.0 { + Some(n.sqrt()) + } else { + None + } + } + IrOp::FFloor => Some(n.floor()), + IrOp::FRound => Some(n.round_ties_even()), + _ => None, + } +} + // --------------------------------------------------------------------------- // Pass 3: Strength reduction // --------------------------------------------------------------------------- @@ -779,6 +866,52 @@ mod tests { )); } + // Float peephole tests + #[test] + fn float_push_fdrop_removed() { + assert_eq!(opt(vec![IrOp::PushF64(1.0), IrOp::FDrop]), vec![]); + } + + #[test] + fn float_fdup_fdrop_removed() { + assert_eq!(opt(vec![IrOp::FDup, IrOp::FDrop]), vec![]); + } + + #[test] + fn float_fswap_fswap_removed() { + assert_eq!(opt(vec![IrOp::FSwap, IrOp::FSwap]), vec![]); + } + + #[test] + fn float_fnegate_fnegate_removed() { + assert_eq!(opt(vec![IrOp::FNegate, IrOp::FNegate]), vec![]); + } + + // Float constant folding tests + #[test] + fn float_constant_fold_add() { + assert_eq!( + opt(vec![IrOp::PushF64(1.5), IrOp::PushF64(2.5), IrOp::FAdd]), + vec![IrOp::PushF64(4.0)] + ); + } + + #[test] + fn float_constant_fold_negate() { + assert_eq!( + opt(vec![IrOp::PushF64(3.0), IrOp::FNegate]), + vec![IrOp::PushF64(-3.0)] + ); + } + + #[test] + fn float_constant_fold_sqrt() { + assert_eq!( + opt(vec![IrOp::PushF64(9.0), IrOp::FSqrt]), + vec![IrOp::PushF64(3.0)] + ); + } + #[test] fn no_inline_large() { let mut bodies = HashMap::new(); diff --git a/crates/core/src/outer.rs b/crates/core/src/outer.rs index 6265cd2..b2da080 100644 --- a/crates/core/src/outer.rs +++ b/crates/core/src/outer.rs @@ -7148,95 +7148,10 @@ impl ForthVM { /// Float stack manipulation words. fn register_float_stack_ops(&mut self) -> anyhow::Result<()> { - // FDROP ( F: r -- ) - { - let fsp = self.fsp; - let func = Func::new( - &mut self.store, - FuncType::new(&self.engine, [], []), - move |mut caller, _, _| { - let sp = fsp.get(&mut caller).unwrap_i32() as u32; - if sp >= FLOAT_STACK_TOP { - return Err(wasmtime::Error::msg("float stack underflow")); - } - fsp.set(&mut caller, Val::I32((sp + 8) as i32)).unwrap(); - Ok(()) - }, - ); - self.register_host_primitive("FDROP", false, func)?; - } - - // FDUP ( F: r -- r r ) - { - let memory = self.memory; - let fsp = self.fsp; - let func = Func::new( - &mut self.store, - FuncType::new(&self.engine, [], []), - move |mut caller, _, _| { - let sp = fsp.get(&mut caller).unwrap_i32() as u32; - if sp >= FLOAT_STACK_TOP { - return Err(wasmtime::Error::msg("float stack underflow")); - } - let new_sp = sp - 8; - if new_sp < FLOAT_STACK_BASE { - return Err(wasmtime::Error::msg("float stack overflow")); - } - let mem = memory.data(&caller); - let bytes: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); - fsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); - let mem = memory.data_mut(&mut caller); - mem[new_sp as usize..new_sp as usize + 8].copy_from_slice(&bytes); - Ok(()) - }, - ); - self.register_host_primitive("FDUP", false, func)?; - } - - // FSWAP ( F: r1 r2 -- r2 r1 ) - { - let memory = self.memory; - let fsp = self.fsp; - let func = Func::new( - &mut self.store, - FuncType::new(&self.engine, [], []), - move |mut caller, _, _| { - let sp = fsp.get(&mut caller).unwrap_i32() as u32; - let mem = memory.data(&caller); - let b: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); - let a: [u8; 8] = mem[sp as usize + 8..sp as usize + 16].try_into().unwrap(); - let mem = memory.data_mut(&mut caller); - mem[sp as usize..sp as usize + 8].copy_from_slice(&a); - mem[sp as usize + 8..sp as usize + 16].copy_from_slice(&b); - Ok(()) - }, - ); - self.register_host_primitive("FSWAP", false, func)?; - } - - // FOVER ( F: r1 r2 -- r1 r2 r1 ) - { - let memory = self.memory; - let fsp = self.fsp; - let func = Func::new( - &mut self.store, - FuncType::new(&self.engine, [], []), - move |mut caller, _, _| { - let sp = fsp.get(&mut caller).unwrap_i32() as u32; - let mem = memory.data(&caller); - let a: [u8; 8] = mem[sp as usize + 8..sp as usize + 16].try_into().unwrap(); - let new_sp = sp - 8; - if new_sp < FLOAT_STACK_BASE { - return Err(wasmtime::Error::msg("float stack overflow")); - } - fsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); - let mem = memory.data_mut(&mut caller); - mem[new_sp as usize..new_sp as usize + 8].copy_from_slice(&a); - Ok(()) - }, - ); - self.register_host_primitive("FOVER", false, func)?; - } + self.register_primitive("FDROP", false, vec![IrOp::FDrop])?; + self.register_primitive("FDUP", false, vec![IrOp::FDup])?; + self.register_primitive("FSWAP", false, vec![IrOp::FSwap])?; + self.register_primitive("FOVER", false, vec![IrOp::FOver])?; // FROT ( F: r1 r2 r3 -- r2 r3 r1 ) { @@ -7288,166 +7203,35 @@ impl ForthVM { self.register_host_primitive("FDEPTH", false, func)?; } - // FNIP ( F: r1 r2 -- r2 ) - { - let memory = self.memory; - let fsp = self.fsp; - let func = Func::new( - &mut self.store, - FuncType::new(&self.engine, [], []), - move |mut caller, _, _| { - let sp = fsp.get(&mut caller).unwrap_i32() as u32; - let mem = memory.data(&caller); - let top: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); - let new_sp = sp + 8; - fsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); - let mem = memory.data_mut(&mut caller); - mem[new_sp as usize..new_sp as usize + 8].copy_from_slice(&top); - Ok(()) - }, - ); - self.register_host_primitive("FNIP", false, func)?; - } - - // FTUCK ( F: r1 r2 -- r2 r1 r2 ) - { - let memory = self.memory; - let fsp = self.fsp; - let func = Func::new( - &mut self.store, - FuncType::new(&self.engine, [], []), - move |mut caller, _, _| { - let sp = fsp.get(&mut caller).unwrap_i32() as u32; - let mem = memory.data(&caller); - let r2: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); - let r1: [u8; 8] = mem[sp as usize + 8..sp as usize + 16].try_into().unwrap(); - let new_sp = sp - 8; - if new_sp < FLOAT_STACK_BASE { - return Err(wasmtime::Error::msg("float stack overflow")); - } - fsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); - let mem = memory.data_mut(&mut caller); - // r2 r1 r2 (bottom to top) - mem[new_sp as usize..new_sp as usize + 8].copy_from_slice(&r2); - mem[new_sp as usize + 8..new_sp as usize + 16].copy_from_slice(&r1); - mem[new_sp as usize + 16..new_sp as usize + 24].copy_from_slice(&r2); - Ok(()) - }, - ); - self.register_host_primitive("FTUCK", false, func)?; - } + self.register_primitive("FNIP", false, vec![IrOp::FSwap, IrOp::FDrop])?; + self.register_primitive("FTUCK", false, vec![IrOp::FSwap, IrOp::FOver])?; Ok(()) } /// Float arithmetic words. fn register_float_arithmetic(&mut self) -> anyhow::Result<()> { - self.register_float_binary("F+", |a, b| a + b)?; - self.register_float_binary("F-", |a, b| a - b)?; - self.register_float_binary("F*", |a, b| a * b)?; - self.register_float_binary("F/", |a, b| a / b)?; - self.register_float_unary("FNEGATE", |a| -a)?; - self.register_float_unary("FABS", f64::abs)?; - self.register_float_binary("FMAX", f64::max)?; - self.register_float_binary("FMIN", f64::min)?; - self.register_float_unary("FSQRT", f64::sqrt)?; - self.register_float_unary("FLOOR", f64::floor)?; - self.register_float_unary("FROUND", f64::round_ties_even)?; + self.register_primitive("F+", false, vec![IrOp::FAdd])?; + self.register_primitive("F-", false, vec![IrOp::FSub])?; + self.register_primitive("F*", false, vec![IrOp::FMul])?; + self.register_primitive("F/", false, vec![IrOp::FDiv])?; + self.register_primitive("FNEGATE", false, vec![IrOp::FNegate])?; + self.register_primitive("FABS", false, vec![IrOp::FAbs])?; + self.register_primitive("FMAX", false, vec![IrOp::FMax])?; + self.register_primitive("FMIN", false, vec![IrOp::FMin])?; + self.register_primitive("FSQRT", false, vec![IrOp::FSqrt])?; + self.register_primitive("FLOOR", false, vec![IrOp::FFloor])?; + self.register_primitive("FROUND", false, vec![IrOp::FRound])?; self.register_float_binary("F**", f64::powf)?; Ok(()) } /// Float comparison words. Results go on the DATA stack. fn register_float_comparisons(&mut self) -> anyhow::Result<()> { - // F0= ( -- flag ) ( F: r -- ) - { - let memory = self.memory; - let dsp = self.dsp; - let fsp = self.fsp; - let func = Func::new( - &mut self.store, - FuncType::new(&self.engine, [], []), - move |mut caller, _, _| { - let sp = fsp.get(&mut caller).unwrap_i32() as u32; - let mem = memory.data(&caller); - let bytes: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); - let val = f64::from_le_bytes(bytes); - fsp.set(&mut caller, Val::I32((sp + 8) as i32)).unwrap(); - let flag: i32 = if val == 0.0 { -1 } else { 0 }; - let dsp_val = dsp.get(&mut caller).unwrap_i32() as u32; - let new_dsp = dsp_val - CELL_SIZE; - dsp.set(&mut caller, Val::I32(new_dsp as i32)).unwrap(); - let mem = memory.data_mut(&mut caller); - mem[new_dsp as usize..new_dsp as usize + 4] - .copy_from_slice(&flag.to_le_bytes()); - Ok(()) - }, - ); - self.register_host_primitive("F0=", false, func)?; - } - - // F0< ( -- flag ) ( F: r -- ) - { - let memory = self.memory; - let dsp = self.dsp; - let fsp = self.fsp; - let func = Func::new( - &mut self.store, - FuncType::new(&self.engine, [], []), - move |mut caller, _, _| { - let sp = fsp.get(&mut caller).unwrap_i32() as u32; - let mem = memory.data(&caller); - let bytes: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); - let val = f64::from_le_bytes(bytes); - fsp.set(&mut caller, Val::I32((sp + 8) as i32)).unwrap(); - let flag: i32 = if val < 0.0 { -1 } else { 0 }; - let dsp_val = dsp.get(&mut caller).unwrap_i32() as u32; - let new_dsp = dsp_val - CELL_SIZE; - dsp.set(&mut caller, Val::I32(new_dsp as i32)).unwrap(); - let mem = memory.data_mut(&mut caller); - mem[new_dsp as usize..new_dsp as usize + 4] - .copy_from_slice(&flag.to_le_bytes()); - Ok(()) - }, - ); - self.register_host_primitive("F0<", false, func)?; - } - - // Helper for binary float comparisons that pop two floats and push a flag - let register_float_cmp = - |vm: &mut Self, name: &str, cmp: fn(f64, f64) -> bool| -> anyhow::Result<()> { - let memory = vm.memory; - let dsp = vm.dsp; - let fsp = vm.fsp; - let func = Func::new( - &mut vm.store, - FuncType::new(&vm.engine, [], []), - move |mut caller, _, _| { - let sp = fsp.get(&mut caller).unwrap_i32() as u32; - let mem = memory.data(&caller); - let b_bytes: [u8; 8] = - mem[sp as usize..sp as usize + 8].try_into().unwrap(); - let a_bytes: [u8; 8] = - mem[sp as usize + 8..sp as usize + 16].try_into().unwrap(); - let b = f64::from_le_bytes(b_bytes); - let a = f64::from_le_bytes(a_bytes); - fsp.set(&mut caller, Val::I32((sp + 16) as i32)).unwrap(); - let flag: i32 = if cmp(a, b) { -1 } else { 0 }; - let dsp_val = dsp.get(&mut caller).unwrap_i32() as u32; - let new_dsp = dsp_val - CELL_SIZE; - dsp.set(&mut caller, Val::I32(new_dsp as i32)).unwrap(); - let mem = memory.data_mut(&mut caller); - mem[new_dsp as usize..new_dsp as usize + 4] - .copy_from_slice(&flag.to_le_bytes()); - Ok(()) - }, - ); - vm.register_host_primitive(name, false, func)?; - Ok(()) - }; - - register_float_cmp(self, "F=", |a, b| a == b)?; - register_float_cmp(self, "F<", |a, b| a < b)?; + self.register_primitive("F0=", false, vec![IrOp::FZeroEq])?; + self.register_primitive("F0<", false, vec![IrOp::FZeroLt])?; + self.register_primitive("F=", false, vec![IrOp::FEq])?; + self.register_primitive("F<", false, vec![IrOp::FLt])?; // F~ ( -- flag ) ( F: r1 r2 r3 -- ) approximate float comparison // If r3 > 0: true if |r1-r2| < r3 @@ -7502,76 +7286,8 @@ impl ForthVM { /// Float memory words. fn register_float_memory(&mut self) -> anyhow::Result<()> { - // F@ ( f-addr -- ) ( F: -- r ) fetch a float from memory - { - let memory = self.memory; - let dsp = self.dsp; - let fsp = self.fsp; - let func = Func::new( - &mut self.store, - FuncType::new(&self.engine, [], []), - move |mut caller, _, _| { - // Read all we need from memory first - let sp = dsp.get(&mut caller).unwrap_i32() as u32; - let fsp_val = fsp.get(&mut caller).unwrap_i32() as u32; - let (addr, val) = { - let mem = memory.data(&caller); - let addr_bytes: [u8; 4] = - mem[sp as usize..sp as usize + 4].try_into().unwrap(); - let addr = u32::from_le_bytes(addr_bytes) as usize; - let float_bytes: [u8; 8] = mem[addr..addr + 8].try_into().unwrap(); - (addr, f64::from_le_bytes(float_bytes)) - }; - let _ = addr; - // Update stack pointers - dsp.set(&mut caller, Val::I32((sp + CELL_SIZE) as i32)) - .unwrap(); - let new_fsp = fsp_val - FLOAT_SIZE; - fsp.set(&mut caller, Val::I32(new_fsp as i32)).unwrap(); - // Write float to float stack - let mem = memory.data_mut(&mut caller); - mem[new_fsp as usize..new_fsp as usize + 8].copy_from_slice(&val.to_le_bytes()); - Ok(()) - }, - ); - self.register_host_primitive("F@", false, func)?; - } - - // F! ( f-addr -- ) ( F: r -- ) store a float to memory - { - let memory = self.memory; - let dsp = self.dsp; - let fsp = self.fsp; - let func = Func::new( - &mut self.store, - FuncType::new(&self.engine, [], []), - move |mut caller, _, _| { - // Read all we need first - let sp = dsp.get(&mut caller).unwrap_i32() as u32; - let fsp_val = fsp.get(&mut caller).unwrap_i32() as u32; - let (addr, float_bytes) = { - let mem = memory.data(&caller); - let addr_bytes: [u8; 4] = - mem[sp as usize..sp as usize + 4].try_into().unwrap(); - let addr = u32::from_le_bytes(addr_bytes) as usize; - let float_bytes: [u8; 8] = mem[fsp_val as usize..fsp_val as usize + 8] - .try_into() - .unwrap(); - (addr, float_bytes) - }; - // Update stack pointers - dsp.set(&mut caller, Val::I32((sp + CELL_SIZE) as i32)) - .unwrap(); - fsp.set(&mut caller, Val::I32((fsp_val + FLOAT_SIZE) as i32)) - .unwrap(); - // Store float at addr - let mem = memory.data_mut(&mut caller); - mem[addr..addr + 8].copy_from_slice(&float_bytes); - Ok(()) - }, - ); - self.register_host_primitive("F!", false, func)?; - } + self.register_primitive("F@", false, vec![IrOp::FetchFloat])?; + self.register_primitive("F!", false, vec![IrOp::StoreFloat])?; // FLOAT+ ( f-addr1 -- f-addr2 ) add float size to address self.register_primitive( @@ -7742,61 +7458,8 @@ impl ForthVM { self.register_host_primitive("F>D", false, func)?; } - // S>F ( n -- ) ( F: -- r ) convert single-cell integer to float - { - let memory = self.memory; - let dsp = self.dsp; - let fsp = self.fsp; - let func = Func::new( - &mut self.store, - FuncType::new(&self.engine, [], []), - move |mut caller, _, _| { - let sp = dsp.get(&mut caller).unwrap_i32() as u32; - let mem = memory.data(&caller); - let b: [u8; 4] = mem[sp as usize..sp as usize + 4].try_into().unwrap(); - let n = i32::from_le_bytes(b); - dsp.set(&mut caller, Val::I32((sp + CELL_SIZE) as i32)) - .unwrap(); - let f = n as f64; - let fsp_val = fsp.get(&mut caller).unwrap_i32() as u32; - let new_fsp = fsp_val - FLOAT_SIZE; - fsp.set(&mut caller, Val::I32(new_fsp as i32)).unwrap(); - let mem = memory.data_mut(&mut caller); - mem[new_fsp as usize..new_fsp as usize + 8].copy_from_slice(&f.to_le_bytes()); - Ok(()) - }, - ); - self.register_host_primitive("S>F", false, func)?; - } - - // F>S ( -- n ) ( F: r -- ) convert float to single-cell integer - { - let memory = self.memory; - let dsp = self.dsp; - let fsp = self.fsp; - let func = Func::new( - &mut self.store, - FuncType::new(&self.engine, [], []), - move |mut caller, _, _| { - let fsp_val = fsp.get(&mut caller).unwrap_i32() as u32; - let mem = memory.data(&caller); - let bytes: [u8; 8] = mem[fsp_val as usize..fsp_val as usize + 8] - .try_into() - .unwrap(); - let f = f64::from_le_bytes(bytes); - fsp.set(&mut caller, Val::I32((fsp_val + FLOAT_SIZE) as i32)) - .unwrap(); - let n = f as i32; - let sp = dsp.get(&mut caller).unwrap_i32() as u32; - let new_sp = sp - CELL_SIZE; - dsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); - let mem = memory.data_mut(&mut caller); - mem[new_sp as usize..new_sp as usize + 4].copy_from_slice(&n.to_le_bytes()); - Ok(()) - }, - ); - self.register_host_primitive("F>S", false, func)?; - } + self.register_primitive("S>F", false, vec![IrOp::StoF])?; + self.register_primitive("F>S", false, vec![IrOp::FtoS])?; Ok(()) } @@ -8361,27 +8024,9 @@ impl ForthVM { } /// Compile a float literal for use inside a colon definition. - /// Creates a tiny host function that pushes the given f64 onto the float stack. + /// Emits `PushF64` IR op which compiles directly to WASM f64.const + float stack push. fn compile_float_literal(&mut self, val: f64) -> anyhow::Result<()> { - let memory = self.memory; - let fsp = self.fsp; - let func = Func::new( - &mut self.store, - FuncType::new(&self.engine, [], []), - move |mut caller, _, _| { - let sp = fsp.get(&mut caller).unwrap_i32() as u32; - let new_sp = sp - FLOAT_SIZE; - if new_sp < FLOAT_STACK_BASE { - return Err(wasmtime::Error::msg("float stack overflow")); - } - fsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); - let mem = memory.data_mut(&mut caller); - mem[new_sp as usize..new_sp as usize + 8].copy_from_slice(&val.to_le_bytes()); - Ok(()) - }, - ); - let word_id = self.install_anon_func(func)?; - self.push_ir(IrOp::Call(word_id)); + self.push_ir(IrOp::PushF64(val)); Ok(()) } @@ -10463,4 +10108,59 @@ mod tests { assert_eq!(eval_stack(": T = ; 5 5 T"), vec![-1]); assert_eq!(eval_stack(": T < ; 3 5 T"), vec![-1]); } + + // =================================================================== + // Float IR tests + // =================================================================== + + #[test] + fn float_ir_add() { + assert_eq!(eval_output("1E 2E F+ F."), "3.000000 "); + } + + #[test] + fn float_ir_literal_in_colon() { + assert_eq!(eval_output(": T 1.5E0 2.5E0 F+ F. ; T"), "4.000000 "); + } + + #[test] + fn float_ir_conversions() { + assert_eq!(eval_stack("42 S>F F>S"), vec![42]); + } + + #[test] + fn float_ir_memory() { + assert_eq!(eval_output("FVARIABLE X 3.14E0 X F! X F@ F."), "3.140000 "); + } + + #[test] + fn float_ir_comparisons() { + assert_eq!(eval_stack("1E 2E F<"), vec![-1]); + assert_eq!(eval_stack("2E 1E F<"), vec![0]); + assert_eq!(eval_stack("3E 3E F="), vec![-1]); + assert_eq!(eval_stack("0E F0="), vec![-1]); + assert_eq!(eval_stack("1E F0="), vec![0]); + assert_eq!(eval_stack("-1E F0<"), vec![-1]); + assert_eq!(eval_stack("1E F0<"), vec![0]); + } + + #[test] + fn float_ir_stack_ops() { + assert_eq!(eval_output("1E FDUP F. F."), "1.000000 1.000000 "); + assert_eq!(eval_output("1E 2E FSWAP F. F."), "1.000000 2.000000 "); + assert_eq!( + eval_output("1E 2E FOVER F. F. F."), + "1.000000 2.000000 1.000000 " + ); + } + + #[test] + fn float_ir_arithmetic() { + assert_eq!(eval_output("10E 3E F- F."), "7.000000 "); + assert_eq!(eval_output("3E 4E F* F."), "12.000000 "); + assert_eq!(eval_output("10E 4E F/ F."), "2.500000 "); + assert_eq!(eval_output("3E FNEGATE F."), "-3.000000 "); + assert_eq!(eval_output("-7E FABS F."), "7.000000 "); + assert_eq!(eval_output("9E FSQRT F."), "3.000000 "); + } }