From b2378e34be22d734c4070cb64c87dc7a0aaf28f2 Mon Sep 17 00:00:00 2001 From: Oleksandr Kozachuk Date: Tue, 7 Apr 2026 15:53:05 +0200 Subject: [PATCH] Add SP@ IR op, replace SOURCE/DEPTH/PICK with Forth (Phase 7) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New IrOp::SpFetch pushes the current data-stack pointer value, enabling Forth-level stack introspection. This unblocks: - DEPTH: `: DEPTH 5440 SP@ - 2 RSHIFT ;` (DATA_STACK_TOP - sp) / 4 - PICK: `: PICK 1+ CELLS SP@ + @ ;` direct memory read - SOURCE: `: SOURCE 64 24 @ ;` reads INPUT_BUFFER_BASE + SYSVAR_NUM_TIB - FALIGNED, SFALIGNED, DFALIGNED: address alignment (shadowed in boot.fth) DEPTH and PICK are now compiled to native WASM — faster than the previous host-function dispatch through call_indirect + Rust closure + mutex. Removed ~109 lines of Rust. All 426 tests pass. --- crates/core/boot.fth | 30 ++++++++++ crates/core/src/codegen.rs | 13 ++++- crates/core/src/ir.rs | 2 + crates/core/src/outer.rs | 112 ++----------------------------------- 4 files changed, 48 insertions(+), 109 deletions(-) diff --git a/crates/core/boot.fth b/crates/core/boot.fth index beb6bc7..0c93b6e 100644 --- a/crates/core/boot.fth +++ b/crates/core/boot.fth @@ -2,6 +2,17 @@ \ Loaded at startup after IR primitives are compiled. \ Compiled WASM with direct calls outperforms host function dispatch. +\ --------------------------------------------------------------- +\ Foundation: stack introspection (needed by 2OVER et al.) +\ --------------------------------------------------------------- + +\ DEPTH ( -- n ) number of items on the data stack +\ DATA_STACK_TOP = 5440, uses arithmetic right shift for / 4 +: DEPTH 5440 SP@ - 2 RSHIFT ; + +\ PICK ( xn..x0 n -- xn..x0 xn ) copy nth stack item +: PICK 1+ CELLS SP@ + @ ; + \ --------------------------------------------------------------- \ Phase 1: Pure stack and memory operations \ --------------------------------------------------------------- @@ -246,3 +257,22 @@ 0 ; \ SEARCH stays as a host function (complex multi-line control flow). + +\ --------------------------------------------------------------- +\ Phase 7: More easy replacements +\ --------------------------------------------------------------- + +\ SOURCE ( -- c-addr u ) input buffer address and length +\ INPUT_BUFFER_BASE = 64, SYSVAR_NUM_TIB = 24 +: SOURCE 64 24 @ ; + +\ FALIGNED ( addr -- addr ) align to 8-byte float boundary +: FALIGNED 7 + -8 AND ; + +\ SFALIGNED ( addr -- addr ) align to 4-byte single-float boundary +: SFALIGNED 3 + -4 AND ; + +\ DFALIGNED ( addr -- addr ) align to 8-byte double-float boundary +: DFALIGNED 7 + -8 AND ; + +\ .S keeps its Rust host function (complex stack introspection). diff --git a/crates/core/src/codegen.rs b/crates/core/src/codegen.rs index df25047..a7109c6 100644 --- a/crates/core/src/codegen.rs +++ b/crates/core/src/codegen.rs @@ -715,6 +715,17 @@ fn emit_op(f: &mut Function, op: &IrOp, ctx: &EmitCtx) { dsp_reload(f); } + IrOp::SpFetch => { + // Push the current cached DSP value onto the data stack. + // Save DSP, decrement, then store the saved value at new TOS. + f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL)) + .instruction(&Instruction::LocalSet(SCRATCH_BASE)); + dsp_dec(f); + f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL)) + .instruction(&Instruction::LocalGet(SCRATCH_BASE)) + .instruction(&Instruction::I32Store(MEM4)); + } + // -- Compound operations ----------------------------------------------- IrOp::TwoDup => { // ( a b -- a b a b ) @@ -982,7 +993,7 @@ fn is_promotable(ops: &[IrOp]) -> bool { } for op in ops { match op { - IrOp::Call(_) | IrOp::TailCall(_) | IrOp::Execute => return false, + IrOp::Call(_) | IrOp::TailCall(_) | IrOp::Execute | IrOp::SpFetch => return false, IrOp::If { .. } | IrOp::DoLoop { .. } | IrOp::BeginUntil { .. } diff --git a/crates/core/src/ir.rs b/crates/core/src/ir.rs index a340740..0fd89d9 100644 --- a/crates/core/src/ir.rs +++ b/crates/core/src/ir.rs @@ -133,6 +133,8 @@ pub enum IrOp { // -- System -- /// Execute word by function table index: ( xt -- ) Execute, + /// Push the current data-stack pointer: ( -- addr ) + SpFetch, // -- Float stack manipulation -- /// Float duplicate: ( F: r -- r r ) diff --git a/crates/core/src/outer.rs b/crates/core/src/outer.rs index 6ddc8ab..7ebccf2 100644 --- a/crates/core/src/outer.rs +++ b/crates/core/src/outer.rs @@ -2089,7 +2089,7 @@ impl ForthVM { )?; // 2OVER: defined in boot.fth self.register_qdup()?; - self.register_pick()?; + // PICK: defined in boot.fth (uses SP@ IR op) self.register_min()?; self.register_max()?; // WITHIN: defined in boot.fth @@ -2100,6 +2100,7 @@ impl ForthVM { // -- Priority 6: System/compiler -- self.register_primitive("EXECUTE", false, vec![IrOp::Execute])?; + self.register_primitive("SP@", false, vec![IrOp::SpFetch])?; self.register_immediate_word()?; self.register_decimal()?; self.register_hex()?; @@ -2107,12 +2108,12 @@ impl ForthVM { self.register_tick()?; self.register_to_body()?; self.register_environment_q()?; - self.register_source()?; + // SOURCE: defined in boot.fth self.register_abort()?; // . (dot): defined in boot.fth self.register_dot_s()?; - self.register_depth()?; + // DEPTH: defined in boot.fth (uses SP@ IR op) // -- Priority 7: New core words -- self.register_count()?; @@ -2291,40 +2292,6 @@ impl ForthVM { Ok(()) } - /// Register DEPTH word. - fn register_depth(&mut self) -> anyhow::Result<()> { - let memory = self.memory; - let dsp_global = self.dsp; - - let func = Func::new( - &mut self.store, - FuncType::new(&self.engine, [], []), - move |mut caller, _params, _results| { - let sp = dsp_global.get(&mut caller).unwrap_i32() as u32; - let depth = if sp <= DATA_STACK_TOP { - ((DATA_STACK_TOP - sp) / CELL_SIZE) as i32 - } else { - // Stack pointer has gone below the base -- treat as empty - 0 - }; - // Push depth onto stack - let mem_len = memory.data(&caller).len() as u32; - let new_sp = sp.wrapping_sub(CELL_SIZE); - if new_sp < crate::memory::DATA_STACK_BASE || new_sp >= mem_len { - return Err(wasmtime::Error::msg("data stack overflow")); - } - let data = memory.data_mut(&mut caller); - let bytes = depth.to_le_bytes(); - data[new_sp as usize..new_sp as usize + 4].copy_from_slice(&bytes); - dsp_global.set(&mut caller, Val::I32(new_sp as i32))?; - Ok(()) - }, - ); - - self.register_host_primitive("DEPTH", false, func)?; - Ok(()) - } - // ----------------------------------------------------------------------- // Priority 1: Loop support host functions // ----------------------------------------------------------------------- @@ -3107,35 +3074,6 @@ impl ForthVM { } /// PICK -- ( xn ... x0 n -- xn ... x0 xn ) pick nth item. - fn register_pick(&mut self) -> anyhow::Result<()> { - let memory = self.memory; - let dsp = self.dsp; - - let func = Func::new( - &mut self.store, - FuncType::new(&self.engine, [], []), - move |mut caller, _params, _results| { - let sp = dsp.get(&mut caller).unwrap_i32() as u32; - let data = memory.data(&caller); - // Read n from TOS - let b: [u8; 4] = data[sp as usize..sp as usize + 4].try_into().unwrap(); - let n = i32::from_le_bytes(b) as u32; - // Read the nth item below TOS: at sp + (n+1)*CELL_SIZE - let pick_addr = (sp + (n + 1) * CELL_SIZE) as usize; - let b: [u8; 4] = data[pick_addr..pick_addr + 4].try_into().unwrap(); - let value = i32::from_le_bytes(b); - // Replace TOS with picked value - let data = memory.data_mut(&mut caller); - let bytes = value.to_le_bytes(); - data[sp as usize..sp as usize + 4].copy_from_slice(&bytes); - Ok(()) - }, - ); - - self.register_host_primitive("PICK", false, func)?; - Ok(()) - } - /// MIN -- ( a b -- min ) fn register_min(&mut self) -> anyhow::Result<()> { // 2DUP > IF SWAP THEN DROP @@ -3326,48 +3264,6 @@ impl ForthVM { Ok(()) } - /// SOURCE -- ( -- c-addr u ) push address and length of input buffer. - fn register_source(&mut self) -> anyhow::Result<()> { - let memory = self.memory; - let dsp = self.dsp; - - let func = Func::new( - &mut self.store, - FuncType::new(&self.engine, [], []), - move |mut caller, _params, _results| { - // The input buffer is synced to WASM memory at INPUT_BUFFER_BASE. - // The length is stored at a known location. We read it from the - // first 4 bytes before the buffer, or we use a different approach: - // read the actual length from a sysvar. - // For simplicity, read the buffer length from SYSVAR_NUM_TIB. - let data = memory.data(&caller); - let b: [u8; 4] = data[SYSVAR_NUM_TIB as usize..SYSVAR_NUM_TIB as usize + 4] - .try_into() - .unwrap(); - let len = i32::from_le_bytes(b); - - let sp = dsp.get(&mut caller).unwrap_i32() as u32; - let mem_len = memory.data(&caller).len() as u32; - // Bounds check for stack underflow/corruption - if sp < 8 || sp > mem_len { - return Err(wasmtime::Error::msg("data stack overflow in SOURCE")); - } - let new_sp = sp - 8; - let data = memory.data_mut(&mut caller); - // c-addr (deeper) - data[(new_sp + 4) as usize..(new_sp + 8) as usize] - .copy_from_slice(&(INPUT_BUFFER_BASE as i32).to_le_bytes()); - // u (on top) - data[new_sp as usize..new_sp as usize + 4].copy_from_slice(&len.to_le_bytes()); - dsp.set(&mut caller, Val::I32(new_sp as i32))?; - Ok(()) - }, - ); - - self.register_host_primitive("SOURCE", false, func)?; - Ok(()) - } - /// ABORT -- clear stacks and throw error. fn register_abort(&mut self) -> anyhow::Result<()> { let dsp = self.dsp;