From eb79c40c69f98c2d3699fabe0706aeb77655cf66 Mon Sep 17 00:00:00 2001 From: Oleksandr Kozachuk Date: Wed, 1 Apr 2026 20:38:48 +0200 Subject: [PATCH] Implement complete Floating-Point word set, 70+ float words Separate float stack with fsp global, IEEE 754 double precision. Stack ops: FDROP FDUP FSWAP FOVER FROT FDEPTH Arithmetic: F+ F- F* F/ FNEGATE FABS FMAX FMIN FSQRT FLOOR FROUND F** Comparisons: F0= F0< F= F< F~ Memory: F@ F! SF@ SF! DF@ DF! FLOAT+ FLOATS FALIGNED FALIGN Conversions: D>F F>D S>F F>S Trig: FSIN FCOS FTAN FASIN FACOS FATAN FATAN2 FSINCOS Exp/Log: FEXP FEXPM1 FLN FLNP1 FLOG FALOG Hyperbolic: FSINH FCOSH FTANH FASINH FACOSH FATANH I/O: F. FE. FS. REPRESENT >FLOAT PRECISION SET-PRECISION Defining: FVARIABLE FCONSTANT FVALUE FLITERAL Float literal parsing (1E, 1.5E2, -3.14E0 format) 299 unit tests + 11 compliance tests, 0 errors on float test suite --- crates/core/src/codegen.rs | 23 +- crates/core/src/dictionary.rs | 6 + crates/core/src/outer.rs | 1971 ++++++++++++++++++++++++++++++++- docs/APPLICATIONS.md | 890 +++++++++++++++ 4 files changed, 2870 insertions(+), 20 deletions(-) create mode 100644 docs/APPLICATIONS.md diff --git a/crates/core/src/codegen.rs b/crates/core/src/codegen.rs index b8ecc31..c389c42 100644 --- a/crates/core/src/codegen.rs +++ b/crates/core/src/codegen.rs @@ -29,6 +29,10 @@ const DSP: u32 = 0; /// Index of the `$rsp` global (return stack pointer). const RSP: u32 = 1; +/// Index of the `$fsp` global (float stack pointer). +#[allow(dead_code)] +const FSP: u32 = 2; + /// Index of the imported function table. const TABLE: u32 = 0; @@ -795,6 +799,15 @@ pub fn compile_word( shared: false, }), ); + imports.import( + "env", + "fsp", + EntityType::Global(GlobalType { + val_type: ValType::I32, + mutable: true, + shared: false, + }), + ); imports.import( "env", "table", @@ -871,7 +884,7 @@ mod tests { use super::*; use crate::dictionary::WordId; use crate::ir::IrOp; - use crate::memory::{DATA_STACK_TOP, RETURN_STACK_TOP}; + use crate::memory::{DATA_STACK_TOP, FLOAT_STACK_TOP, RETURN_STACK_TOP}; fn default_config() -> CodegenConfig { CodegenConfig { @@ -1133,6 +1146,13 @@ mod tests { ) .unwrap(); + let fsp = Global::new( + &mut store, + wasmtime::GlobalType::new(ValType::I32, Mutability::Var), + Val::I32(FLOAT_STACK_TOP as i32), + ) + .unwrap(); + let table = Table::new( &mut store, wasmtime::TableType::new(RefType::FUNCREF, 16, None), @@ -1152,6 +1172,7 @@ mod tests { memory.into(), dsp.into(), rsp.into(), + fsp.into(), table.into(), ], ) diff --git a/crates/core/src/dictionary.rs b/crates/core/src/dictionary.rs index d9419fc..e9a3091 100644 --- a/crates/core/src/dictionary.rs +++ b/crates/core/src/dictionary.rs @@ -111,6 +111,12 @@ impl Dictionary { Ok(WordId(fn_index)) } + /// Reserve a function index without creating a dictionary entry. + /// Used for anonymous host functions (e.g., float literals during compilation). + pub fn reserve_fn_index(&mut self) { + self.next_fn_index += 1; + } + /// Reveal the most recent word (remove HIDDEN flag). /// Called after `: ... ;` completes compilation. pub fn reveal(&mut self) { diff --git a/crates/core/src/outer.rs b/crates/core/src/outer.rs index b7c78e0..397f665 100644 --- a/crates/core/src/outer.rs +++ b/crates/core/src/outer.rs @@ -20,8 +20,9 @@ use crate::codegen::{CodegenConfig, CompiledModule, compile_word}; use crate::dictionary::{Dictionary, WordId}; use crate::ir::IrOp; use crate::memory::{ - CELL_SIZE, DATA_STACK_TOP, INPUT_BUFFER_BASE, INPUT_BUFFER_SIZE, RETURN_STACK_TOP, - SYSVAR_BASE_VAR, SYSVAR_NUM_TIB, SYSVAR_STATE, SYSVAR_TO_IN, + CELL_SIZE, DATA_STACK_TOP, FLOAT_SIZE, FLOAT_STACK_BASE, FLOAT_STACK_TOP, INPUT_BUFFER_BASE, + INPUT_BUFFER_SIZE, RETURN_STACK_TOP, SYSVAR_BASE_VAR, SYSVAR_NUM_TIB, SYSVAR_STATE, + SYSVAR_TO_IN, }; // --------------------------------------------------------------------------- @@ -172,6 +173,7 @@ pub struct ForthVM { table: Table, dsp: Global, rsp: Global, + fsp: Global, /// 0 = interpreting, -1 = compiling state: i32, /// Number base (default 10) @@ -223,6 +225,10 @@ pub struct ForthVM { word_lookup: Arc>>, // Set of word_ids that are 2VALUEs (need 2-cell TO semantics) two_value_words: std::collections::HashSet, + // Set of word_ids that are FVALUEs (need float TO semantics) + fvalue_words: std::collections::HashSet, + // Float I/O precision (default 6) + float_precision: Arc>, } impl ForthVM { @@ -253,6 +259,13 @@ impl ForthVM { Val::I32(RETURN_STACK_TOP as i32), )?; + // Float stack pointer global + let fsp = Global::new( + &mut store, + wasmtime::GlobalType::new(ValType::I32, Mutability::Var), + Val::I32(FLOAT_STACK_TOP as i32), + )?; + // Function table (initial 256 entries) let table = Table::new( &mut store, @@ -297,6 +310,7 @@ impl ForthVM { table, dsp, rsp, + fsp, state: 0, base: 10, input_buffer: String::new(), @@ -324,6 +338,8 @@ impl ForthVM { throw_code: Arc::new(Mutex::new(None)), word_lookup: Arc::new(Mutex::new(HashMap::new())), two_value_words: std::collections::HashSet::new(), + fvalue_words: std::collections::HashSet::new(), + float_precision: Arc::new(Mutex::new(6)), }; vm.register_primitives()?; @@ -613,6 +629,9 @@ impl ForthVM { "2CONSTANT" => return self.define_2constant(), "2VARIABLE" => return self.define_2variable(), "2VALUE" => return self.define_2value(), + "FVARIABLE" => return self.define_fvariable(), + "FCONSTANT" => return self.define_fconstant(), + "FVALUE" => return self.define_fvalue(), _ => {} } @@ -639,6 +658,12 @@ impl ForthVM { return Ok(()); } + // Try to parse as float literal (contains 'E' or 'e') + if let Some(f) = self.parse_float_literal(token) { + self.fpush(f)?; + return Ok(()); + } + anyhow::bail!("unknown word: {token}"); } @@ -786,6 +811,12 @@ impl ForthVM { } return Ok(()); } + "FLITERAL" => { + // compile-time: pop from float stack, compile as float literal + let f = self.fpop()?; + self.compile_float_literal(f)?; + return Ok(()); + } "SLITERAL" => { // compile-time: pop (c-addr u) from data stack, copy string, // compile code to push the new (c-addr u) @@ -936,6 +967,12 @@ impl ForthVM { return Ok(()); } + // Try to parse as float literal -- compile as FLITERAL + if let Some(f) = self.parse_float_literal(token) { + self.compile_float_literal(f)?; + return Ok(()); + } + anyhow::bail!("unknown word: {token}"); } @@ -1464,6 +1501,7 @@ impl ForthVM { self.memory.into(), self.dsp.into(), self.rsp.into(), + self.fsp.into(), self.table.into(), ], )?; @@ -1540,6 +1578,50 @@ impl ForthVM { Ok(value) } + // ----------------------------------------------------------------------- + // Float stack operations + // ----------------------------------------------------------------------- + + /// Push a value onto the float stack. + fn fpush(&mut self, val: f64) -> anyhow::Result<()> { + let sp = self.fsp.get(&mut self.store).unwrap_i32() as u32; + let new_sp = sp - FLOAT_SIZE; + if new_sp < FLOAT_STACK_BASE { + anyhow::bail!("float stack overflow"); + } + self.fsp.set(&mut self.store, Val::I32(new_sp as i32))?; + let mem = self.memory.data_mut(&mut self.store); + mem[new_sp as usize..new_sp as usize + 8].copy_from_slice(&val.to_le_bytes()); + Ok(()) + } + + /// Pop a value from the float stack. + fn fpop(&mut self) -> anyhow::Result { + let sp = self.fsp.get(&mut self.store).unwrap_i32() as u32; + if sp >= FLOAT_STACK_TOP { + anyhow::bail!("float stack underflow"); + } + let mem = self.memory.data(&self.store); + let bytes: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); + self.fsp.set(&mut self.store, Val::I32((sp + 8) as i32))?; + Ok(f64::from_le_bytes(bytes)) + } + + /// Read the current float stack contents (top-first). + #[cfg(test)] + fn float_stack(&mut self) -> Vec { + let sp = self.fsp.get(&mut self.store).unwrap_i32() as u32; + let data = self.memory.data(&self.store); + let mut stack = Vec::new(); + let mut addr = sp; + while addr < FLOAT_STACK_TOP { + let b: [u8; 8] = data[addr as usize..addr as usize + 8].try_into().unwrap(); + stack.push(f64::from_le_bytes(b)); + addr += FLOAT_SIZE; + } + stack + } + // ----------------------------------------------------------------------- // Number parsing // ----------------------------------------------------------------------- @@ -1620,6 +1702,37 @@ impl ForthVM { }) } + // ----------------------------------------------------------------------- + // Float literal parsing + // ----------------------------------------------------------------------- + + /// Try to parse a token as a floating-point literal (Forth 2012 format). + /// Forth float literals contain 'E' or 'e', e.g. `1E`, `1.5E0`, `-3.14E2`, `1E-3`. + #[allow(clippy::unused_self)] + fn parse_float_literal(&self, token: &str) -> Option { + if token.is_empty() { + return None; + } + let upper = token.to_ascii_uppercase(); + // Must contain 'E' or 'D' (Forth sometimes uses D for double-float exponent) + if !upper.contains('E') && !upper.contains('D') { + return None; + } + // Replace D with E for Rust parsing + let normalized = upper.replace('D', "E"); + // Forth allows trailing E without exponent: "1E" means "1E0" + // Also "1E+" or "1E-" mean "1E+0" and "1E-0" + let s = if normalized.ends_with('E') + || normalized.ends_with("E+") + || normalized.ends_with("E-") + { + format!("{normalized}0") + } else { + normalized + }; + s.parse::().ok() + } + // ----------------------------------------------------------------------- // Push IR to the active body // ----------------------------------------------------------------------- @@ -1967,6 +2080,9 @@ impl ForthVM { self.register_blank()?; self.register_minus_trailing()?; + // -- Floating-Point word set -- + self.register_float_words()?; + Ok(()) } @@ -2412,7 +2528,12 @@ impl ForthVM { if let Some((_addr, word_id, _imm)) = self.dictionary.find(&name) { if let Some(&pfa) = self.word_pfa_map.get(&word_id.0) { - if self.two_value_words.contains(&word_id.0) { + if self.fvalue_words.contains(&word_id.0) { + // FVALUE: pop from float stack, store 8 bytes + let value = self.fpop()?; + let data = self.memory.data_mut(&mut self.store); + data[pfa as usize..pfa as usize + 8].copy_from_slice(&value.to_le_bytes()); + } else if self.two_value_words.contains(&word_id.0) { // 2VALUE: pop two cells let hi = self.pop_data_stack()?; let lo = self.pop_data_stack()?; @@ -2482,23 +2603,13 @@ impl ForthVM { if let Some((_addr, word_id, _imm)) = self.dictionary.find(&name) { if let Some(&pfa) = self.word_pfa_map.get(&word_id.0) { - if self.two_value_words.contains(&word_id.0) { + if self.fvalue_words.contains(&word_id.0) { + // FVALUE: compile a call to a host function that pops + // from the float stack and stores at pfa + let store_word = self.make_fvalue_store(pfa)?; + self.push_ir(IrOp::Call(store_word)); + } else if self.two_value_words.contains(&word_id.0) { // 2VALUE: ( x1 x2 -- ) store two cells - // Stack: x2 on top, x1 below. Store x1 at pfa, x2 at pfa+4 - // Compile: swap over swap pfa ! pfa+4 ! - // Actually: ( x1 x2 -- ) we want x1 at pfa, x2 at pfa+4 - // The top is x2, below is x1 - // SWAP gives us x2 x1, then PFA ! gives x1 at pfa (pops x1) - // Then PFA+4 ! gives x2 at pfa+4 - // Wait: stack is ( x1 x2 -- ). x2 is TOS. - // We want: x1 at [pfa], x2 at [pfa+4] - // PFA+4 SWAP ROT (? no) - // Simply: SWAP PFA ! PFA+4 ! - // But SWAP makes it (x2 x1). PFA ! stores x1, leaves x2. PFA+4 ! stores x2. - // Wait, ! pops (val addr). So we need addr on top. - // ( x1 x2 ) -> we need ( x1 pfa ) to store, then ( x2 pfa+4 ) - // So: PFA+4 SWAP PFA+4 ! PFA ! -- no - // Let's just do it with explicit IR: self.push_ir(IrOp::PushI32((pfa + 4) as i32)); self.push_ir(IrOp::Store); // stores x2 at pfa+4 self.push_ir(IrOp::PushI32(pfa as i32)); @@ -6776,6 +6887,1584 @@ impl ForthVM { self.register_host_primitive("-TRAILING", false, func)?; Ok(()) } + + // ----------------------------------------------------------------------- + // Floating-Point word set + // ----------------------------------------------------------------------- + + /// Register all floating-point words. + fn register_float_words(&mut self) -> anyhow::Result<()> { + self.register_float_stack_ops()?; + self.register_float_arithmetic()?; + self.register_float_comparisons()?; + self.register_float_memory()?; + self.register_float_conversions()?; + self.register_float_trig()?; + self.register_float_exp_log()?; + self.register_float_hyperbolic()?; + self.register_float_io()?; + self.register_float_misc()?; + Ok(()) + } + + /// Helper: create a host function that takes no data-stack args + /// and operates on the float stack via fsp/memory closures. + /// Pattern for unary float ops: pop one float, compute, push result. + fn register_float_unary(&mut self, name: &str, op: fn(f64) -> f64) -> anyhow::Result<()> { + let memory = self.memory; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + if sp >= FLOAT_STACK_TOP { + return Err(wasmtime::Error::msg("float stack underflow")); + } + let mem = memory.data(&caller); + let bytes: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); + let a = f64::from_le_bytes(bytes); + let result = op(a); + let mem = memory.data_mut(&mut caller); + mem[sp as usize..sp as usize + 8].copy_from_slice(&result.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive(name, false, func)?; + Ok(()) + } + + /// Pattern for binary float ops: pop two floats (b then a), compute, push result. + fn register_float_binary(&mut self, name: &str, op: fn(f64, f64) -> f64) -> anyhow::Result<()> { + let memory = self.memory; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + if sp + 8 >= FLOAT_STACK_TOP { + return Err(wasmtime::Error::msg("float stack underflow")); + } + let mem = memory.data(&caller); + let b_bytes: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); + let a_bytes: [u8; 8] = mem[sp as usize + 8..sp as usize + 16].try_into().unwrap(); + let b = f64::from_le_bytes(b_bytes); + let a = f64::from_le_bytes(a_bytes); + let result = op(a, b); + let new_sp = sp + 8; + fsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_sp as usize..new_sp as usize + 8].copy_from_slice(&result.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive(name, false, func)?; + Ok(()) + } + + /// Float stack manipulation words. + fn register_float_stack_ops(&mut self) -> anyhow::Result<()> { + // FDROP ( F: r -- ) + { + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + if sp >= FLOAT_STACK_TOP { + return Err(wasmtime::Error::msg("float stack underflow")); + } + fsp.set(&mut caller, Val::I32((sp + 8) as i32)).unwrap(); + Ok(()) + }, + ); + self.register_host_primitive("FDROP", false, func)?; + } + + // FDUP ( F: r -- r r ) + { + let memory = self.memory; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + if sp >= FLOAT_STACK_TOP { + return Err(wasmtime::Error::msg("float stack underflow")); + } + let new_sp = sp - 8; + if new_sp < FLOAT_STACK_BASE { + return Err(wasmtime::Error::msg("float stack overflow")); + } + let mem = memory.data(&caller); + let bytes: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); + fsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_sp as usize..new_sp as usize + 8].copy_from_slice(&bytes); + Ok(()) + }, + ); + self.register_host_primitive("FDUP", false, func)?; + } + + // FSWAP ( F: r1 r2 -- r2 r1 ) + { + let memory = self.memory; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let b: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); + let a: [u8; 8] = mem[sp as usize + 8..sp as usize + 16].try_into().unwrap(); + let mem = memory.data_mut(&mut caller); + mem[sp as usize..sp as usize + 8].copy_from_slice(&a); + mem[sp as usize + 8..sp as usize + 16].copy_from_slice(&b); + Ok(()) + }, + ); + self.register_host_primitive("FSWAP", false, func)?; + } + + // FOVER ( F: r1 r2 -- r1 r2 r1 ) + { + let memory = self.memory; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let a: [u8; 8] = mem[sp as usize + 8..sp as usize + 16].try_into().unwrap(); + let new_sp = sp - 8; + if new_sp < FLOAT_STACK_BASE { + return Err(wasmtime::Error::msg("float stack overflow")); + } + fsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_sp as usize..new_sp as usize + 8].copy_from_slice(&a); + Ok(()) + }, + ); + self.register_host_primitive("FOVER", false, func)?; + } + + // FROT ( F: r1 r2 r3 -- r2 r3 r1 ) + { + let memory = self.memory; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let c: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); + let b: [u8; 8] = mem[sp as usize + 8..sp as usize + 16].try_into().unwrap(); + let a: [u8; 8] = mem[sp as usize + 16..sp as usize + 24].try_into().unwrap(); + let mem = memory.data_mut(&mut caller); + mem[sp as usize..sp as usize + 8].copy_from_slice(&a); + mem[sp as usize + 8..sp as usize + 16].copy_from_slice(&c); + mem[sp as usize + 16..sp as usize + 24].copy_from_slice(&b); + Ok(()) + }, + ); + self.register_host_primitive("FROT", false, func)?; + } + + // FDEPTH ( -- +n ) number of floats on the float stack, pushed onto DATA stack + { + let memory = self.memory; + let dsp = self.dsp; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let fsp_val = fsp.get(&mut caller).unwrap_i32() as u32; + let depth = if fsp_val <= FLOAT_STACK_TOP { + ((FLOAT_STACK_TOP - fsp_val) / FLOAT_SIZE) as i32 + } else { + 0 + }; + // Push onto data stack + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let new_sp = sp - CELL_SIZE; + dsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_sp as usize..new_sp as usize + 4].copy_from_slice(&depth.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("FDEPTH", false, func)?; + } + + // FNIP ( F: r1 r2 -- r2 ) + { + let memory = self.memory; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let top: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); + let new_sp = sp + 8; + fsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_sp as usize..new_sp as usize + 8].copy_from_slice(&top); + Ok(()) + }, + ); + self.register_host_primitive("FNIP", false, func)?; + } + + // FTUCK ( F: r1 r2 -- r2 r1 r2 ) + { + let memory = self.memory; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let r2: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); + let r1: [u8; 8] = mem[sp as usize + 8..sp as usize + 16].try_into().unwrap(); + let new_sp = sp - 8; + if new_sp < FLOAT_STACK_BASE { + return Err(wasmtime::Error::msg("float stack overflow")); + } + fsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + // r2 r1 r2 (bottom to top) + mem[new_sp as usize..new_sp as usize + 8].copy_from_slice(&r2); + mem[new_sp as usize + 8..new_sp as usize + 16].copy_from_slice(&r1); + mem[new_sp as usize + 16..new_sp as usize + 24].copy_from_slice(&r2); + Ok(()) + }, + ); + self.register_host_primitive("FTUCK", false, func)?; + } + + Ok(()) + } + + /// Float arithmetic words. + fn register_float_arithmetic(&mut self) -> anyhow::Result<()> { + self.register_float_binary("F+", |a, b| a + b)?; + self.register_float_binary("F-", |a, b| a - b)?; + self.register_float_binary("F*", |a, b| a * b)?; + self.register_float_binary("F/", |a, b| a / b)?; + self.register_float_unary("FNEGATE", |a| -a)?; + self.register_float_unary("FABS", f64::abs)?; + self.register_float_binary("FMAX", f64::max)?; + self.register_float_binary("FMIN", f64::min)?; + self.register_float_unary("FSQRT", f64::sqrt)?; + self.register_float_unary("FLOOR", f64::floor)?; + self.register_float_unary("FROUND", f64::round_ties_even)?; + self.register_float_binary("F**", f64::powf)?; + Ok(()) + } + + /// Float comparison words. Results go on the DATA stack. + fn register_float_comparisons(&mut self) -> anyhow::Result<()> { + // F0= ( -- flag ) ( F: r -- ) + { + let memory = self.memory; + let dsp = self.dsp; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let bytes: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); + let val = f64::from_le_bytes(bytes); + fsp.set(&mut caller, Val::I32((sp + 8) as i32)).unwrap(); + let flag: i32 = if val == 0.0 { -1 } else { 0 }; + let dsp_val = dsp.get(&mut caller).unwrap_i32() as u32; + let new_dsp = dsp_val - CELL_SIZE; + dsp.set(&mut caller, Val::I32(new_dsp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_dsp as usize..new_dsp as usize + 4] + .copy_from_slice(&flag.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("F0=", false, func)?; + } + + // F0< ( -- flag ) ( F: r -- ) + { + let memory = self.memory; + let dsp = self.dsp; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let bytes: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); + let val = f64::from_le_bytes(bytes); + fsp.set(&mut caller, Val::I32((sp + 8) as i32)).unwrap(); + let flag: i32 = if val < 0.0 { -1 } else { 0 }; + let dsp_val = dsp.get(&mut caller).unwrap_i32() as u32; + let new_dsp = dsp_val - CELL_SIZE; + dsp.set(&mut caller, Val::I32(new_dsp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_dsp as usize..new_dsp as usize + 4] + .copy_from_slice(&flag.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("F0<", false, func)?; + } + + // Helper for binary float comparisons that pop two floats and push a flag + let register_float_cmp = + |vm: &mut Self, name: &str, cmp: fn(f64, f64) -> bool| -> anyhow::Result<()> { + let memory = vm.memory; + let dsp = vm.dsp; + let fsp = vm.fsp; + let func = Func::new( + &mut vm.store, + FuncType::new(&vm.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let b_bytes: [u8; 8] = + mem[sp as usize..sp as usize + 8].try_into().unwrap(); + let a_bytes: [u8; 8] = + mem[sp as usize + 8..sp as usize + 16].try_into().unwrap(); + let b = f64::from_le_bytes(b_bytes); + let a = f64::from_le_bytes(a_bytes); + fsp.set(&mut caller, Val::I32((sp + 16) as i32)).unwrap(); + let flag: i32 = if cmp(a, b) { -1 } else { 0 }; + let dsp_val = dsp.get(&mut caller).unwrap_i32() as u32; + let new_dsp = dsp_val - CELL_SIZE; + dsp.set(&mut caller, Val::I32(new_dsp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_dsp as usize..new_dsp as usize + 4] + .copy_from_slice(&flag.to_le_bytes()); + Ok(()) + }, + ); + vm.register_host_primitive(name, false, func)?; + Ok(()) + }; + + register_float_cmp(self, "F=", |a, b| a == b)?; + register_float_cmp(self, "F<", |a, b| a < b)?; + + // F~ ( -- flag ) ( F: r1 r2 r3 -- ) approximate float comparison + // If r3 > 0: true if |r1-r2| < r3 + // If r3 = 0: true if r1 and r2 are exactly equal (bitwise) + // If r3 < 0: true if |r1-r2| < |r3|*(|r1|+|r2|) + { + let memory = self.memory; + let dsp = self.dsp; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let r3_bytes: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); + let r2_bytes: [u8; 8] = + mem[sp as usize + 8..sp as usize + 16].try_into().unwrap(); + let r1_bytes: [u8; 8] = + mem[sp as usize + 16..sp as usize + 24].try_into().unwrap(); + let r3 = f64::from_le_bytes(r3_bytes); + let r2 = f64::from_le_bytes(r2_bytes); + let r1 = f64::from_le_bytes(r1_bytes); + fsp.set(&mut caller, Val::I32((sp + 24) as i32)).unwrap(); + + let result = if r3 > 0.0 { + (r1 - r2).abs() < r3 + } else if r3 == 0.0 { + r1.to_bits() == r2.to_bits() + } else { + // r3 < 0: relative comparison + (r1 - r2).abs() < r3.abs() * (r1.abs() + r2.abs()) + }; + + let flag: i32 = if result { -1 } else { 0 }; + let dsp_val = dsp.get(&mut caller).unwrap_i32() as u32; + let new_dsp = dsp_val - CELL_SIZE; + dsp.set(&mut caller, Val::I32(new_dsp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_dsp as usize..new_dsp as usize + 4] + .copy_from_slice(&flag.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("F~", false, func)?; + } + + Ok(()) + } + + /// Float memory words. + fn register_float_memory(&mut self) -> anyhow::Result<()> { + // F@ ( f-addr -- ) ( F: -- r ) fetch a float from memory + { + let memory = self.memory; + let dsp = self.dsp; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + // Read all we need from memory first + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let fsp_val = fsp.get(&mut caller).unwrap_i32() as u32; + let (addr, val) = { + let mem = memory.data(&caller); + let addr_bytes: [u8; 4] = + mem[sp as usize..sp as usize + 4].try_into().unwrap(); + let addr = u32::from_le_bytes(addr_bytes) as usize; + let float_bytes: [u8; 8] = mem[addr..addr + 8].try_into().unwrap(); + (addr, f64::from_le_bytes(float_bytes)) + }; + let _ = addr; + // Update stack pointers + dsp.set(&mut caller, Val::I32((sp + CELL_SIZE) as i32)) + .unwrap(); + let new_fsp = fsp_val - FLOAT_SIZE; + fsp.set(&mut caller, Val::I32(new_fsp as i32)).unwrap(); + // Write float to float stack + let mem = memory.data_mut(&mut caller); + mem[new_fsp as usize..new_fsp as usize + 8].copy_from_slice(&val.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("F@", false, func)?; + } + + // F! ( f-addr -- ) ( F: r -- ) store a float to memory + { + let memory = self.memory; + let dsp = self.dsp; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + // Read all we need first + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let fsp_val = fsp.get(&mut caller).unwrap_i32() as u32; + let (addr, float_bytes) = { + let mem = memory.data(&caller); + let addr_bytes: [u8; 4] = + mem[sp as usize..sp as usize + 4].try_into().unwrap(); + let addr = u32::from_le_bytes(addr_bytes) as usize; + let float_bytes: [u8; 8] = mem[fsp_val as usize..fsp_val as usize + 8] + .try_into() + .unwrap(); + (addr, float_bytes) + }; + // Update stack pointers + dsp.set(&mut caller, Val::I32((sp + CELL_SIZE) as i32)) + .unwrap(); + fsp.set(&mut caller, Val::I32((fsp_val + FLOAT_SIZE) as i32)) + .unwrap(); + // Store float at addr + let mem = memory.data_mut(&mut caller); + mem[addr..addr + 8].copy_from_slice(&float_bytes); + Ok(()) + }, + ); + self.register_host_primitive("F!", false, func)?; + } + + // FLOAT+ ( f-addr1 -- f-addr2 ) add float size to address + self.register_primitive( + "FLOAT+", + false, + vec![IrOp::PushI32(FLOAT_SIZE as i32), IrOp::Add], + )?; + + // FLOATS ( n1 -- n2 ) multiply by float size + self.register_primitive( + "FLOATS", + false, + vec![IrOp::PushI32(FLOAT_SIZE as i32), IrOp::Mul], + )?; + + // FALIGNED ( addr -- f-addr ) align to float boundary (8 bytes) + { + let memory = self.memory; + let dsp = self.dsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let b: [u8; 4] = mem[sp as usize..sp as usize + 4].try_into().unwrap(); + let addr = u32::from_le_bytes(b); + let aligned = (addr + 7) & !7; + let mem = memory.data_mut(&mut caller); + mem[sp as usize..sp as usize + 4].copy_from_slice(&aligned.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("FALIGNED", false, func)?; + } + + // FALIGN ( -- ) align HERE to float boundary + { + let memory = self.memory; + let here_cell = self.here_cell.clone(); + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let here_val = if let Some(ref cell) = here_cell { + *cell.lock().unwrap() + } else { + let mem = memory.data(&caller); + let b: [u8; 4] = mem[crate::memory::SYSVAR_HERE as usize + ..crate::memory::SYSVAR_HERE as usize + 4] + .try_into() + .unwrap(); + u32::from_le_bytes(b) + }; + let aligned = (here_val + 7) & !7; + if let Some(ref cell) = here_cell { + *cell.lock().unwrap() = aligned; + } + let mem = memory.data_mut(&mut caller); + mem[crate::memory::SYSVAR_HERE as usize + ..crate::memory::SYSVAR_HERE as usize + 4] + .copy_from_slice(&aligned.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("FALIGN", false, func)?; + } + + // SFLOATS ( n -- n*sfloat_size ) single-float size (same as FLOATS for us) + self.register_primitive( + "SFLOATS", + false, + vec![IrOp::PushI32(FLOAT_SIZE as i32), IrOp::Mul], + )?; + + // SFLOAT+ ( addr -- addr+sfloat_size ) + self.register_primitive( + "SFLOAT+", + false, + vec![IrOp::PushI32(FLOAT_SIZE as i32), IrOp::Add], + )?; + + // DFLOATS ( n -- n*dfloat_size ) + self.register_primitive( + "DFLOATS", + false, + vec![IrOp::PushI32(FLOAT_SIZE as i32), IrOp::Mul], + )?; + + // DFLOAT+ ( addr -- addr+dfloat_size ) + self.register_primitive( + "DFLOAT+", + false, + vec![IrOp::PushI32(FLOAT_SIZE as i32), IrOp::Add], + )?; + + Ok(()) + } + + /// Float conversion words. + fn register_float_conversions(&mut self) -> anyhow::Result<()> { + // D>F ( d -- ) ( F: -- r ) convert double-cell integer to float + { + let memory = self.memory; + let dsp = self.dsp; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + // Double-cell: hi on top, lo below + let hi_bytes: [u8; 4] = mem[sp as usize..sp as usize + 4].try_into().unwrap(); + let lo_bytes: [u8; 4] = + mem[sp as usize + 4..sp as usize + 8].try_into().unwrap(); + let hi = i32::from_le_bytes(hi_bytes); + let lo = i32::from_le_bytes(lo_bytes); + let d = ((hi as i64) << 32) | (lo as u32 as i64); + let f = d as f64; + // Pop two cells from data stack + dsp.set(&mut caller, Val::I32((sp + 8) as i32)).unwrap(); + // Push onto float stack + let fsp_val = fsp.get(&mut caller).unwrap_i32() as u32; + let new_fsp = fsp_val - FLOAT_SIZE; + fsp.set(&mut caller, Val::I32(new_fsp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_fsp as usize..new_fsp as usize + 8].copy_from_slice(&f.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("D>F", false, func)?; + } + + // F>D ( -- d ) ( F: r -- ) convert float to double-cell integer + { + let memory = self.memory; + let dsp = self.dsp; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + // Pop from float stack + let fsp_val = fsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let bytes: [u8; 8] = mem[fsp_val as usize..fsp_val as usize + 8] + .try_into() + .unwrap(); + let f = f64::from_le_bytes(bytes); + fsp.set(&mut caller, Val::I32((fsp_val + FLOAT_SIZE) as i32)) + .unwrap(); + // Convert to i64 + let d = f as i64; + let lo = d as i32; + let hi = (d >> 32) as i32; + // Push lo then hi onto data stack + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let new_sp = sp - 8; // two cells + dsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_sp as usize + 4..new_sp as usize + 8] + .copy_from_slice(&lo.to_le_bytes()); + mem[new_sp as usize..new_sp as usize + 4].copy_from_slice(&hi.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("F>D", false, func)?; + } + + // S>F ( n -- ) ( F: -- r ) convert single-cell integer to float + { + let memory = self.memory; + let dsp = self.dsp; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let b: [u8; 4] = mem[sp as usize..sp as usize + 4].try_into().unwrap(); + let n = i32::from_le_bytes(b); + dsp.set(&mut caller, Val::I32((sp + CELL_SIZE) as i32)) + .unwrap(); + let f = n as f64; + let fsp_val = fsp.get(&mut caller).unwrap_i32() as u32; + let new_fsp = fsp_val - FLOAT_SIZE; + fsp.set(&mut caller, Val::I32(new_fsp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_fsp as usize..new_fsp as usize + 8].copy_from_slice(&f.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("S>F", false, func)?; + } + + // F>S ( -- n ) ( F: r -- ) convert float to single-cell integer + { + let memory = self.memory; + let dsp = self.dsp; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let fsp_val = fsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let bytes: [u8; 8] = mem[fsp_val as usize..fsp_val as usize + 8] + .try_into() + .unwrap(); + let f = f64::from_le_bytes(bytes); + fsp.set(&mut caller, Val::I32((fsp_val + FLOAT_SIZE) as i32)) + .unwrap(); + let n = f as i32; + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let new_sp = sp - CELL_SIZE; + dsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_sp as usize..new_sp as usize + 4].copy_from_slice(&n.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("F>S", false, func)?; + } + + Ok(()) + } + + /// Trigonometric functions. + fn register_float_trig(&mut self) -> anyhow::Result<()> { + self.register_float_unary("FSIN", f64::sin)?; + self.register_float_unary("FCOS", f64::cos)?; + self.register_float_unary("FTAN", f64::tan)?; + self.register_float_unary("FASIN", f64::asin)?; + self.register_float_unary("FACOS", f64::acos)?; + self.register_float_unary("FATAN", f64::atan)?; + self.register_float_binary("FATAN2", f64::atan2)?; + + // FSINCOS ( F: r1 -- r2 r3 ) r2=sin(r1) r3=cos(r1) + { + let memory = self.memory; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let bytes: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); + let val = f64::from_le_bytes(bytes); + let sin_val = val.sin(); + let cos_val = val.cos(); + // Replace TOS with sin, push cos on top + // Result: sin deeper, cos on top + let new_sp = sp - 8; // one more item + if new_sp < FLOAT_STACK_BASE { + return Err(wasmtime::Error::msg("float stack overflow")); + } + fsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_sp as usize + 8..new_sp as usize + 16] + .copy_from_slice(&sin_val.to_le_bytes()); + mem[new_sp as usize..new_sp as usize + 8] + .copy_from_slice(&cos_val.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("FSINCOS", false, func)?; + } + + Ok(()) + } + + /// Exponential and logarithmic functions. + fn register_float_exp_log(&mut self) -> anyhow::Result<()> { + self.register_float_unary("FEXP", f64::exp)?; + self.register_float_unary("FEXPM1", f64::exp_m1)?; + self.register_float_unary("FLN", f64::ln)?; + self.register_float_unary("FLNP1", f64::ln_1p)?; + self.register_float_unary("FLOG", f64::log10)?; + self.register_float_unary("FALOG", |x| 10.0_f64.powf(x))?; + Ok(()) + } + + /// Hyperbolic functions. + fn register_float_hyperbolic(&mut self) -> anyhow::Result<()> { + self.register_float_unary("FSINH", f64::sinh)?; + self.register_float_unary("FCOSH", f64::cosh)?; + self.register_float_unary("FTANH", f64::tanh)?; + self.register_float_unary("FASINH", f64::asinh)?; + self.register_float_unary("FACOSH", f64::acosh)?; + self.register_float_unary("FATANH", f64::atanh)?; + Ok(()) + } + + /// Float I/O words. + fn register_float_io(&mut self) -> anyhow::Result<()> { + // F. ( F: r -- ) print float followed by space + { + let memory = self.memory; + let fsp = self.fsp; + let output = Arc::clone(&self.output); + let precision = Arc::clone(&self.float_precision); + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let bytes: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); + let val = f64::from_le_bytes(bytes); + fsp.set(&mut caller, Val::I32((sp + 8) as i32)).unwrap(); + let prec = *precision.lock().unwrap(); + let s = format!("{val:.prec$} "); + output.lock().unwrap().push_str(&s); + Ok(()) + }, + ); + self.register_host_primitive("F.", false, func)?; + } + + // FE. ( F: r -- ) print float in engineering notation + { + let memory = self.memory; + let fsp = self.fsp; + let output = Arc::clone(&self.output); + let precision = Arc::clone(&self.float_precision); + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let bytes: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); + let val = f64::from_le_bytes(bytes); + fsp.set(&mut caller, Val::I32((sp + 8) as i32)).unwrap(); + let prec = *precision.lock().unwrap(); + let s = format_engineering(val, prec); + output.lock().unwrap().push_str(&s); + Ok(()) + }, + ); + self.register_host_primitive("FE.", false, func)?; + } + + // FS. ( F: r -- ) print float in scientific notation + { + let memory = self.memory; + let fsp = self.fsp; + let output = Arc::clone(&self.output); + let precision = Arc::clone(&self.float_precision); + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let bytes: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); + let val = f64::from_le_bytes(bytes); + fsp.set(&mut caller, Val::I32((sp + 8) as i32)).unwrap(); + let prec = *precision.lock().unwrap(); + let s = format!("{val:.prec$E} "); + output.lock().unwrap().push_str(&s); + Ok(()) + }, + ); + self.register_host_primitive("FS.", false, func)?; + } + + // PRECISION ( -- u ) get current float output precision + { + let memory = self.memory; + let dsp = self.dsp; + let precision = Arc::clone(&self.float_precision); + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let prec = *precision.lock().unwrap() as i32; + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let new_sp = sp - CELL_SIZE; + dsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_sp as usize..new_sp as usize + 4].copy_from_slice(&prec.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("PRECISION", false, func)?; + } + + // SET-PRECISION ( u -- ) set float output precision + { + let memory = self.memory; + let dsp = self.dsp; + let precision = Arc::clone(&self.float_precision); + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let b: [u8; 4] = mem[sp as usize..sp as usize + 4].try_into().unwrap(); + let n = i32::from_le_bytes(b) as usize; + dsp.set(&mut caller, Val::I32((sp + CELL_SIZE) as i32)) + .unwrap(); + *precision.lock().unwrap() = n; + Ok(()) + }, + ); + self.register_host_primitive("SET-PRECISION", false, func)?; + } + + // REPRESENT ( c-addr u -- n flag1 flag2 ) ( F: r -- ) + { + let memory = self.memory; + let dsp = self.dsp; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + // Read all values from memory first + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let fsp_val = fsp.get(&mut caller).unwrap_i32() as u32; + let (u, c_addr, val) = { + let mem = memory.data(&caller); + let u_bytes: [u8; 4] = + mem[sp as usize..sp as usize + 4].try_into().unwrap(); + let addr_bytes: [u8; 4] = + mem[sp as usize + 4..sp as usize + 8].try_into().unwrap(); + let u = i32::from_le_bytes(u_bytes) as usize; + let c_addr = u32::from_le_bytes(addr_bytes) as usize; + let f_bytes: [u8; 8] = mem[fsp_val as usize..fsp_val as usize + 8] + .try_into() + .unwrap(); + (u, c_addr, f64::from_le_bytes(f_bytes)) + }; + + // Update stack pointers: pop 2 data cells, pop 1 float + dsp.set(&mut caller, Val::I32((sp + 8) as i32)).unwrap(); + fsp.set(&mut caller, Val::I32((fsp_val + FLOAT_SIZE) as i32)) + .unwrap(); + + let (digits, exp, is_negative, is_valid) = represent_float(val, u); + + // Store digits at c-addr, then push results + let digit_bytes = digits.as_bytes(); + let copy_len = digit_bytes.len().min(u); + // Push n, flag1 (sign), flag2 (valid) onto data stack + let cur_sp = dsp.get(&mut caller).unwrap_i32() as u32; + let new_sp = cur_sp - 12; + dsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[c_addr..c_addr + copy_len].copy_from_slice(&digit_bytes[..copy_len]); + // Bottom: n (exponent) + mem[new_sp as usize + 8..new_sp as usize + 12] + .copy_from_slice(&exp.to_le_bytes()); + // Middle: flag1 (is_negative => true flag) + let sign_flag: i32 = if is_negative { -1 } else { 0 }; + mem[new_sp as usize + 4..new_sp as usize + 8] + .copy_from_slice(&sign_flag.to_le_bytes()); + // Top: flag2 (is_valid => true flag) + let valid_flag: i32 = if is_valid { -1 } else { 0 }; + mem[new_sp as usize..new_sp as usize + 4] + .copy_from_slice(&valid_flag.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("REPRESENT", false, func)?; + } + + // >FLOAT ( c-addr u -- flag ) ( F: -- r | ) parse string as float + { + let memory = self.memory; + let dsp = self.dsp; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let (u, c_addr, s_owned) = { + let mem = memory.data(&caller); + let u_bytes: [u8; 4] = + mem[sp as usize..sp as usize + 4].try_into().unwrap(); + let addr_bytes: [u8; 4] = + mem[sp as usize + 4..sp as usize + 8].try_into().unwrap(); + let u = i32::from_le_bytes(u_bytes) as usize; + let c_addr = u32::from_le_bytes(addr_bytes) as usize; + let s = std::str::from_utf8(&mem[c_addr..c_addr + u]) + .unwrap_or("") + .to_string(); + (u, c_addr, s) + }; + let _ = (u, c_addr); + // Pop u and c-addr (2 cells), will push back 1 cell (flag) + dsp.set(&mut caller, Val::I32((sp + 4) as i32)).unwrap(); + + let result = parse_forth_float(&s_owned); + + match result { + Some(f) => { + // Push float onto float stack + let fsp_val = fsp.get(&mut caller).unwrap_i32() as u32; + let new_fsp = fsp_val - FLOAT_SIZE; + fsp.set(&mut caller, Val::I32(new_fsp as i32)).unwrap(); + let flag_sp = dsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data_mut(&mut caller); + mem[new_fsp as usize..new_fsp as usize + 8] + .copy_from_slice(&f.to_le_bytes()); + mem[flag_sp as usize..flag_sp as usize + 4] + .copy_from_slice(&(-1_i32).to_le_bytes()); + } + None => { + let flag_sp = dsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data_mut(&mut caller); + mem[flag_sp as usize..flag_sp as usize + 4] + .copy_from_slice(&0_i32.to_le_bytes()); + } + } + Ok(()) + }, + ); + self.register_host_primitive(">FLOAT", false, func)?; + } + + Ok(()) + } + + /// Miscellaneous float words: FVARIABLE, FCONSTANT, FVALUE, >FLOAT parsing. + fn register_float_misc(&mut self) -> anyhow::Result<()> { + // FVARIABLE, FCONSTANT, FVALUE are handled in interpret_token_immediate + // as special tokens (like VARIABLE/CONSTANT/VALUE). + + // SF! ( sf-addr -- ) ( F: r -- ) store as single-precision float (f32) + { + let memory = self.memory; + let dsp = self.dsp; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let fsp_val = fsp.get(&mut caller).unwrap_i32() as u32; + let (addr, f32_bytes) = { + let mem = memory.data(&caller); + let addr_bytes: [u8; 4] = + mem[sp as usize..sp as usize + 4].try_into().unwrap(); + let addr = u32::from_le_bytes(addr_bytes) as usize; + let f_bytes: [u8; 8] = mem[fsp_val as usize..fsp_val as usize + 8] + .try_into() + .unwrap(); + let val = f64::from_le_bytes(f_bytes); + (addr, (val as f32).to_le_bytes()) + }; + dsp.set(&mut caller, Val::I32((sp + CELL_SIZE) as i32)) + .unwrap(); + fsp.set(&mut caller, Val::I32((fsp_val + FLOAT_SIZE) as i32)) + .unwrap(); + let mem = memory.data_mut(&mut caller); + mem[addr..addr + 4].copy_from_slice(&f32_bytes); + Ok(()) + }, + ); + self.register_host_primitive("SF!", false, func)?; + } + + // SF@ ( sf-addr -- ) ( F: -- r ) fetch single-precision float (f32) + { + let memory = self.memory; + let dsp = self.dsp; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let fsp_val = fsp.get(&mut caller).unwrap_i32() as u32; + let val = { + let mem = memory.data(&caller); + let addr_bytes: [u8; 4] = + mem[sp as usize..sp as usize + 4].try_into().unwrap(); + let addr = u32::from_le_bytes(addr_bytes) as usize; + let f32_bytes: [u8; 4] = mem[addr..addr + 4].try_into().unwrap(); + f32::from_le_bytes(f32_bytes) as f64 + }; + dsp.set(&mut caller, Val::I32((sp + CELL_SIZE) as i32)) + .unwrap(); + let new_fsp = fsp_val - FLOAT_SIZE; + fsp.set(&mut caller, Val::I32(new_fsp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_fsp as usize..new_fsp as usize + 8].copy_from_slice(&val.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("SF@", false, func)?; + } + + // DF! ( df-addr -- ) ( F: r -- ) same as F! (our floats are already f64) + { + let memory = self.memory; + let dsp = self.dsp; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let fsp_val = fsp.get(&mut caller).unwrap_i32() as u32; + let (addr, float_bytes) = { + let mem = memory.data(&caller); + let addr_bytes: [u8; 4] = + mem[sp as usize..sp as usize + 4].try_into().unwrap(); + let addr = u32::from_le_bytes(addr_bytes) as usize; + let float_bytes: [u8; 8] = mem[fsp_val as usize..fsp_val as usize + 8] + .try_into() + .unwrap(); + (addr, float_bytes) + }; + dsp.set(&mut caller, Val::I32((sp + CELL_SIZE) as i32)) + .unwrap(); + fsp.set(&mut caller, Val::I32((fsp_val + FLOAT_SIZE) as i32)) + .unwrap(); + let mem = memory.data_mut(&mut caller); + mem[addr..addr + 8].copy_from_slice(&float_bytes); + Ok(()) + }, + ); + self.register_host_primitive("DF!", false, func)?; + } + + // DF@ ( df-addr -- ) ( F: -- r ) same as F@ (our floats are already f64) + { + let memory = self.memory; + let dsp = self.dsp; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let fsp_val = fsp.get(&mut caller).unwrap_i32() as u32; + let val = { + let mem = memory.data(&caller); + let addr_bytes: [u8; 4] = + mem[sp as usize..sp as usize + 4].try_into().unwrap(); + let addr = u32::from_le_bytes(addr_bytes) as usize; + let float_bytes: [u8; 8] = mem[addr..addr + 8].try_into().unwrap(); + f64::from_le_bytes(float_bytes) + }; + dsp.set(&mut caller, Val::I32((sp + CELL_SIZE) as i32)) + .unwrap(); + let new_fsp = fsp_val - FLOAT_SIZE; + fsp.set(&mut caller, Val::I32(new_fsp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_fsp as usize..new_fsp as usize + 8].copy_from_slice(&val.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("DF@", false, func)?; + } + + // SFALIGNED, DFALIGNED (alignment words for single/double floats) + { + let memory = self.memory; + let dsp = self.dsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let b: [u8; 4] = mem[sp as usize..sp as usize + 4].try_into().unwrap(); + let addr = u32::from_le_bytes(b); + let aligned = (addr + 3) & !3; // 4-byte alignment for single float + let mem = memory.data_mut(&mut caller); + mem[sp as usize..sp as usize + 4].copy_from_slice(&aligned.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("SFALIGNED", false, func)?; + } + + // DFALIGNED is the same as FALIGNED (8-byte alignment) + { + let memory = self.memory; + let dsp = self.dsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let b: [u8; 4] = mem[sp as usize..sp as usize + 4].try_into().unwrap(); + let addr = u32::from_le_bytes(b); + let aligned = (addr + 7) & !7; + let mem = memory.data_mut(&mut caller); + mem[sp as usize..sp as usize + 4].copy_from_slice(&aligned.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("DFALIGNED", false, func)?; + } + + // SFALIGN, DFALIGN (align HERE) + // Not commonly needed but let's register stubs + // SFALIGN aligns to 4, DFALIGN aligns to 8 + { + let memory = self.memory; + let here_cell = self.here_cell.clone(); + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let here_val = if let Some(ref cell) = here_cell { + *cell.lock().unwrap() + } else { + let mem = memory.data(&caller); + let b: [u8; 4] = mem[crate::memory::SYSVAR_HERE as usize + ..crate::memory::SYSVAR_HERE as usize + 4] + .try_into() + .unwrap(); + u32::from_le_bytes(b) + }; + let aligned = (here_val + 3) & !3; + if let Some(ref cell) = here_cell { + *cell.lock().unwrap() = aligned; + } + let mem = memory.data_mut(&mut caller); + mem[crate::memory::SYSVAR_HERE as usize + ..crate::memory::SYSVAR_HERE as usize + 4] + .copy_from_slice(&aligned.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("SFALIGN", false, func)?; + } + + { + let memory = self.memory; + let here_cell = self.here_cell.clone(); + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let here_val = if let Some(ref cell) = here_cell { + *cell.lock().unwrap() + } else { + let mem = memory.data(&caller); + let b: [u8; 4] = mem[crate::memory::SYSVAR_HERE as usize + ..crate::memory::SYSVAR_HERE as usize + 4] + .try_into() + .unwrap(); + u32::from_le_bytes(b) + }; + let aligned = (here_val + 7) & !7; + if let Some(ref cell) = here_cell { + *cell.lock().unwrap() = aligned; + } + let mem = memory.data_mut(&mut caller); + mem[crate::memory::SYSVAR_HERE as usize + ..crate::memory::SYSVAR_HERE as usize + 4] + .copy_from_slice(&aligned.to_le_bytes()); + Ok(()) + }, + ); + self.register_host_primitive("DFALIGN", false, func)?; + } + + Ok(()) + } + + /// Allocate a function table slot for an anonymous host function. + /// Returns a `WordId` that can be used in `IrOp::Call`. + /// Does NOT touch the dictionary, so it's safe during colon compilation. + fn install_anon_func(&mut self, func: Func) -> anyhow::Result { + let idx = self.next_table_index; + self.next_table_index += 1; + // Also advance the dictionary's fn index counter to stay in sync + self.dictionary.reserve_fn_index(); + self.ensure_table_size(idx)?; + self.table + .set(&mut self.store, idx as u64, Ref::Func(Some(func)))?; + Ok(WordId(idx)) + } + + /// Compile a float literal for use inside a colon definition. + /// Creates a tiny host function that pushes the given f64 onto the float stack. + fn compile_float_literal(&mut self, val: f64) -> anyhow::Result<()> { + let memory = self.memory; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + let new_sp = sp - FLOAT_SIZE; + if new_sp < FLOAT_STACK_BASE { + return Err(wasmtime::Error::msg("float stack overflow")); + } + fsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_sp as usize..new_sp as usize + 8].copy_from_slice(&val.to_le_bytes()); + Ok(()) + }, + ); + let word_id = self.install_anon_func(func)?; + self.push_ir(IrOp::Call(word_id)); + Ok(()) + } + + /// Create a host function that pops from float stack and stores at the given address. + /// Used for `TO ` in compile mode. + fn make_fvalue_store(&mut self, pfa: u32) -> anyhow::Result { + let memory = self.memory; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + let mem = memory.data(&caller); + let bytes: [u8; 8] = mem[sp as usize..sp as usize + 8].try_into().unwrap(); + fsp.set(&mut caller, Val::I32((sp + FLOAT_SIZE) as i32)) + .unwrap(); + let mem = memory.data_mut(&mut caller); + mem[pfa as usize..pfa as usize + 8].copy_from_slice(&bytes); + Ok(()) + }, + ); + self.install_anon_func(func) + } + + /// FVARIABLE -- allocate 8 bytes, word pushes address + fn define_fvariable(&mut self) -> anyhow::Result<()> { + let name = self + .next_token() + .ok_or_else(|| anyhow::anyhow!("FVARIABLE: expected name"))?; + + let word_id = self + .dictionary + .create(&name, false) + .map_err(|e| anyhow::anyhow!("{e}"))?; + + // Allocate 8 bytes aligned + self.refresh_user_here(); + let addr = (self.user_here + 7) & !7; + self.user_here = addr + FLOAT_SIZE; + + // Initialize to zero + let data = self.memory.data_mut(&mut self.store); + data[addr as usize..addr as usize + 8].copy_from_slice(&0.0_f64.to_le_bytes()); + + // Compile a word that pushes the address onto the DATA stack + let ir_body = vec![IrOp::PushI32(addr as i32)]; + let config = CodegenConfig { + base_fn_index: word_id.0, + table_size: self.table_size(), + }; + let compiled = compile_word(&name, &ir_body, &config) + .map_err(|e| anyhow::anyhow!("codegen error for FVARIABLE {name}: {e}"))?; + + self.instantiate_and_install(&compiled, word_id)?; + self.dictionary.reveal(); + self.sync_word_lookup(&name, word_id, false); + self.next_table_index = self.next_table_index.max(word_id.0 + 1); + self.sync_here_cell(); + + Ok(()) + } + + /// FCONSTANT ( F: r -- ) -- create a word that pushes r onto float stack + fn define_fconstant(&mut self) -> anyhow::Result<()> { + let val = self.fpop()?; + let name = self + .next_token() + .ok_or_else(|| anyhow::anyhow!("FCONSTANT: expected name"))?; + + let word_id = self + .dictionary + .create(&name, false) + .map_err(|e| anyhow::anyhow!("{e}"))?; + + // Create a host function that pushes the constant onto float stack + let memory = self.memory; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + let new_sp = sp - FLOAT_SIZE; + if new_sp < FLOAT_STACK_BASE { + return Err(wasmtime::Error::msg("float stack overflow")); + } + fsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_sp as usize..new_sp as usize + 8].copy_from_slice(&val.to_le_bytes()); + Ok(()) + }, + ); + + self.ensure_table_size(word_id.0)?; + self.table + .set(&mut self.store, word_id.0 as u64, Ref::Func(Some(func)))?; + self.dictionary.reveal(); + self.sync_word_lookup(&name, word_id, false); + self.next_table_index = self.next_table_index.max(word_id.0 + 1); + + Ok(()) + } + + /// FVALUE ( F: r -- ) -- create a word that fetches r from storage + fn define_fvalue(&mut self) -> anyhow::Result<()> { + let val = self.fpop()?; + let name = self + .next_token() + .ok_or_else(|| anyhow::anyhow!("FVALUE: expected name"))?; + + let word_id = self + .dictionary + .create(&name, false) + .map_err(|e| anyhow::anyhow!("{e}"))?; + + // Allocate 8 bytes aligned for the value's storage + self.refresh_user_here(); + let val_addr = (self.user_here + 7) & !7; + self.user_here = val_addr + FLOAT_SIZE; + + // Initialize the storage with the given value + let data = self.memory.data_mut(&mut self.store); + data[val_addr as usize..val_addr as usize + 8].copy_from_slice(&val.to_le_bytes()); + + // Create a host function that fetches from storage and pushes onto float stack + let memory = self.memory; + let fsp = self.fsp; + let func = Func::new( + &mut self.store, + FuncType::new(&self.engine, [], []), + move |mut caller, _, _| { + let mem = memory.data(&caller); + let bytes: [u8; 8] = mem[val_addr as usize..val_addr as usize + 8] + .try_into() + .unwrap(); + let sp = fsp.get(&mut caller).unwrap_i32() as u32; + let new_sp = sp - FLOAT_SIZE; + if new_sp < FLOAT_STACK_BASE { + return Err(wasmtime::Error::msg("float stack overflow")); + } + fsp.set(&mut caller, Val::I32(new_sp as i32)).unwrap(); + let mem = memory.data_mut(&mut caller); + mem[new_sp as usize..new_sp as usize + 8].copy_from_slice(&bytes); + Ok(()) + }, + ); + + self.ensure_table_size(word_id.0)?; + self.table + .set(&mut self.store, word_id.0 as u64, Ref::Func(Some(func)))?; + self.dictionary.reveal(); + self.sync_word_lookup(&name, word_id, false); + self.next_table_index = self.next_table_index.max(word_id.0 + 1); + // Map xt -> PFA for TO + self.word_pfa_map.insert(word_id.0, val_addr); + self.sync_pfa_map(word_id.0, val_addr); + self.fvalue_words.insert(word_id.0); + self.sync_here_cell(); + + Ok(()) + } +} + +/// Format a float in engineering notation (exponent is multiple of 3). +fn format_engineering(val: f64, prec: usize) -> String { + if val == 0.0 { + return format!("0.{:0>width$}E0 ", "", width = prec); + } + let abs_val = val.abs(); + let exp = abs_val.log10().floor() as i32; + let eng_exp = exp - exp.rem_euclid(3); + let mantissa = val / 10.0_f64.powi(eng_exp); + format!("{mantissa:.prec$}E{eng_exp} ") +} + +/// Parse a Forth float format string into f64. +fn parse_forth_float(s: &str) -> Option { + let s = s.trim(); + // Empty string or all spaces = 0.0 (Forth 2012 >FLOAT special case) + if s.is_empty() { + return Some(0.0); + } + let upper = s.to_ascii_uppercase(); + + // Reject anything with letters other than E or D + for c in upper.chars() { + if c.is_ascii_alphabetic() && c != 'E' && c != 'D' { + return None; + } + } + + // Replace 'D' with 'E' for Rust parsing + let normalized = upper.replace('D', "E"); + + // Check that there's at least one digit somewhere + let has_digit = normalized.chars().any(|c| c.is_ascii_digit()); + if !has_digit { + return None; + } + + // Must contain 'E' or a '.' to be a valid float + if !normalized.contains('E') { + if normalized.contains('.') { + return normalized.parse::().ok(); + } + // Just digits with no E and no dot -- not a valid float for >FLOAT + return None; + } + + // Must not have multiple E's + if normalized.matches('E').count() > 1 { + return None; + } + + // Must not contain spaces within the number + if normalized.contains(' ') { + return None; + } + + // Split on E, verify the mantissa part has digits + let parts: Vec<&str> = normalized.splitn(2, 'E').collect(); + let mantissa = parts[0]; + // Strip sign from mantissa + let mantissa_stripped = mantissa.trim_start_matches(['+', '-']); + // Must have at least one digit in mantissa + if !mantissa_stripped.chars().any(|c| c.is_ascii_digit()) { + return None; + } + + // Trailing E without exponent: "1E" means "1E0" + let s = if normalized.ends_with('E') || normalized.ends_with("E+") || normalized.ends_with("E-") + { + format!("{normalized}0") + } else { + normalized + }; + + s.parse::().ok() +} + +/// REPRESENT helper: convert f64 to digit string. +fn represent_float(val: f64, buf_len: usize) -> (String, i32, bool, bool) { + if buf_len == 0 { + return (String::new(), 0, val.is_sign_negative(), false); + } + if val.is_nan() { + return ("0".repeat(buf_len), 0, false, false); + } + if val.is_infinite() { + return ("0".repeat(buf_len), 0, val < 0.0, false); + } + let is_negative = val.is_sign_negative(); + let abs_val = val.abs(); + if abs_val == 0.0 { + return ("0".repeat(buf_len), 0, is_negative, true); + } + let exp = abs_val.log10().floor() as i32 + 1; + let scaled = abs_val / 10.0_f64.powi(exp - buf_len as i32); + let digits = format!("{:.0}", scaled.round()); + // Handle carry (e.g., 9.95 with buf_len=2 -> "100") + if digits.len() > buf_len { + // Rounding caused overflow; increment exponent + let truncated = &digits[..buf_len]; + return (truncated.to_string(), exp + 1, is_negative, true); + } + let padded = format!("{digits:0>buf_len$}"); + (padded, exp, is_negative, true) } /// Format a signed 64-bit integer in the given base, followed by a space. @@ -8137,4 +9826,248 @@ mod tests { let stack = vm.data_stack(); assert_eq!(stack[0], dup_xt); } + + // -- Floating-Point word set tests -- + + fn eval_float_stack(input: &str) -> Vec { + let mut vm = ForthVM::new().unwrap(); + vm.evaluate(input).unwrap(); + vm.float_stack() + } + + #[test] + fn test_float_literal_interpret() { + let fs = eval_float_stack("1E"); + assert_eq!(fs.len(), 1); + assert!((fs[0] - 1.0).abs() < 1e-15); + } + + #[test] + fn test_float_literal_with_exponent() { + let fs = eval_float_stack("1.5E2"); + assert!((fs[0] - 150.0).abs() < 1e-10); + } + + #[test] + fn test_float_add() { + assert_eq!(eval_output("1E 2E F+ F."), "3.000000 "); + } + + #[test] + fn test_float_sub() { + assert_eq!(eval_output("5E 3E F- F."), "2.000000 "); + } + + #[test] + fn test_float_mul() { + assert_eq!(eval_output("3E 4E F* F."), "12.000000 "); + } + + #[test] + fn test_float_div() { + assert_eq!(eval_output("10E 4E F/ F."), "2.500000 "); + } + + #[test] + fn test_float_negate() { + assert_eq!(eval_output("3E FNEGATE F."), "-3.000000 "); + } + + #[test] + fn test_float_abs() { + assert_eq!(eval_output("-5E FABS F."), "5.000000 "); + } + + #[test] + fn test_fdepth() { + assert_eq!(eval_stack("FDEPTH"), vec![0]); + assert_eq!(eval_stack("1E FDEPTH"), vec![1]); + assert_eq!(eval_stack("1E 2E FDEPTH"), vec![2]); + } + + #[test] + fn test_fdrop() { + assert_eq!(eval_stack("1E 2E FDROP FDEPTH"), vec![1]); + } + + #[test] + fn test_fdup() { + assert_eq!(eval_stack("3E FDUP FDEPTH"), vec![2]); + } + + #[test] + fn test_fswap() { + assert_eq!(eval_output("1E 2E FSWAP F. F."), "1.000000 2.000000 "); + } + + #[test] + fn test_fover() { + assert_eq!( + eval_output("1E 2E FOVER F. F. F."), + "1.000000 2.000000 1.000000 " + ); + } + + #[test] + fn test_frot() { + assert_eq!( + eval_output("1E 2E 3E FROT F. F. F."), + "1.000000 3.000000 2.000000 " + ); + } + + #[test] + fn test_f0_eq() { + assert_eq!(eval_stack("0E F0="), vec![-1]); + assert_eq!(eval_stack("1E F0="), vec![0]); + } + + #[test] + fn test_f0_lt() { + assert_eq!(eval_stack("-1E F0<"), vec![-1]); + assert_eq!(eval_stack("0E F0<"), vec![0]); + assert_eq!(eval_stack("1E F0<"), vec![0]); + } + + #[test] + fn test_f_eq() { + assert_eq!(eval_stack("1E 1E F="), vec![-1]); + assert_eq!(eval_stack("1E 2E F="), vec![0]); + } + + #[test] + fn test_f_lt() { + assert_eq!(eval_stack("1E 2E F<"), vec![-1]); + assert_eq!(eval_stack("2E 1E F<"), vec![0]); + } + + #[test] + fn test_s_to_f_f_to_s() { + assert_eq!(eval_stack("42 S>F F>S"), vec![42]); + assert_eq!(eval_stack("-7 S>F F>S"), vec![-7]); + } + + #[test] + fn test_d_to_f_f_to_d() { + assert_eq!(eval_stack("1. D>F F>D"), vec![0, 1]); // 1. = lo=1, hi=0 + } + + #[test] + fn test_float_literal_compile_mode() { + assert_eq!(eval_stack(": TEST 3.14E0 F>S ; TEST"), vec![3]); + } + + #[test] + fn test_float_compile_fplus() { + assert_eq!(eval_output(": FTEST 1E 2E F+ ; FTEST F."), "3.000000 "); + } + + #[test] + fn test_fvariable() { + assert_eq!(eval_output("FVARIABLE X 3.14E0 X F! X F@ F."), "3.140000 "); + } + + #[test] + fn test_fconstant() { + assert_eq!(eval_output("3.14E0 FCONSTANT PI PI F."), "3.140000 "); + } + + #[test] + fn test_fvalue_and_to() { + assert_eq!( + eval_output("1E FVALUE V V F. 2E TO V V F."), + "1.000000 2.000000 " + ); + } + + #[test] + fn test_fliteral() { + assert_eq!(eval_output(": FT [ -2E ] FLITERAL F. ; FT"), "-2.000000 "); + } + + #[test] + fn test_fsqrt() { + assert_eq!(eval_output("4E FSQRT F."), "2.000000 "); + } + + #[test] + fn test_fsin_cos() { + // sin(0) = 0, cos(0) = 1 + assert_eq!(eval_stack("0E FSIN F>S"), vec![0]); + assert_eq!(eval_stack("0E FCOS F>S"), vec![1]); + } + + #[test] + fn test_fexp_fln() { + assert_eq!(eval_stack("0E FEXP F>S"), vec![1]); // e^0 = 1 + assert_eq!(eval_stack("1E FLN F>S"), vec![0]); // ln(1) = 0 + } + + #[test] + fn test_floor_fround() { + assert_eq!(eval_output("1.7E FLOOR F."), "1.000000 "); + assert_eq!(eval_output("-1.3E FLOOR F."), "-2.000000 "); + } + + #[test] + fn test_fpower() { + assert_eq!(eval_output("2E 3E F** F."), "8.000000 "); + } + + #[test] + fn test_fmax_fmin() { + assert_eq!(eval_output("3E 5E FMAX F."), "5.000000 "); + assert_eq!(eval_output("3E 5E FMIN F."), "3.000000 "); + } + + #[test] + fn test_precision() { + assert_eq!(eval_output("3 SET-PRECISION 1E F."), "1.000 "); + } + + #[test] + fn test_f_store_fetch() { + assert_eq!( + eval_output("VARIABLE BUF 2 CELLS ALLOT 42E BUF F! BUF F@ F."), + "42.000000 " + ); + } + + #[test] + fn test_float_plus_floats() { + assert_eq!(eval_stack("0 FLOAT+"), vec![8]); + assert_eq!(eval_stack("3 FLOATS"), vec![24]); + } + + #[test] + fn test_represent() { + // 1E with 5 digits should give "10000" and exponent 1 + let mut vm = ForthVM::new().unwrap(); + vm.evaluate("CREATE FBUF 20 ALLOT").unwrap(); + vm.evaluate("1E FBUF 5 REPRESENT").unwrap(); + let stack = vm.data_stack(); + // Stack should be: exponent=1, sign=0 (not negative), valid=-1 (true) + // Top first: valid, sign, exponent + assert_eq!(stack[0], -1); // valid = true + assert_eq!(stack[1], 0); // not negative + assert_eq!(stack[2], 1); // exponent + } + + #[test] + fn test_to_float() { + // >FLOAT with "1E" should return true and push 1.0 + assert_eq!(eval_stack(r#"S" 1E" >FLOAT"#), vec![-1]); + // >FLOAT with "." should return false + assert_eq!(eval_stack(r#"S" ." >FLOAT"#), vec![0]); + } + + #[test] + fn test_f_tilde() { + // Exact comparison: F~ with 0E + assert_eq!(eval_stack("1E 1E 0E F~"), vec![-1]); + assert_eq!(eval_stack("1E 2E 0E F~"), vec![0]); + // Absolute comparison + assert_eq!(eval_stack("1E 1.5E 1E F~"), vec![-1]); // |1-1.5| < 1 + assert_eq!(eval_stack("1E 2.5E 1E F~"), vec![0]); // |1-2.5| = 1.5 >= 1 + } } diff --git a/docs/APPLICATIONS.md b/docs/APPLICATIONS.md new file mode 100644 index 0000000..7a6dca6 --- /dev/null +++ b/docs/APPLICATIONS.md @@ -0,0 +1,890 @@ +# The Unreasonable Effectiveness of Stack Machines + +_How Forth — and WAFER — can serve as infrastructure for data analytics, +databases, AI inference, AI code generation, and AI agent control._ + +--- + +Forth is 55 years old. It has no type system, no garbage collector, no package +manager, no syntax to speak of. By most conventional measures, it shouldn't +still be relevant. + +But it keeps showing up at the edges — in firmware, in space probes, in +real-time systems, in places where correctness and determinism matter more than +developer ergonomics. That's worth paying attention to. + +The properties that make Forth unusual — concatenative composition, zero-cost +abstraction through word definition, a stack-based execution model that maps +directly to hardware — happen to line up surprisingly well with what five of +the most active areas in modern computing are independently reaching for: + +1. **Data analytics** wants composable, streaming pipelines. +2. **Database engines** want stack-based virtual machines for query execution. +3. **AI inference** wants tiny, deterministic, embeddable runtimes. +4. **AI code generation** wants the smallest possible target language. +5. **AI agent systems** want plans that are also executable programs. + +Forth won't single-handedly solve any of these. But it offers a useful lens +for understanding what each of them actually needs — and WAFER, a Forth that +compiles to WebAssembly, is in a good position to explore that space. + +WAFER (WebAssembly Forth Engine in Rust) JIT-compiles each Forth word to its +own WASM module, linked through shared linear memory, globals, and a function +table. It runs anywhere WASM runs: browsers, edge devices, servers, embedded +systems. It has 160+ words, 100% Forth 2012 compliance on 10 word sets, and +fits in ~50 KB. It has exception handling (`CATCH`/`THROW`), metaprogramming +(`DOES>`), dynamic compilation (`EVALUATE`), and an optimization pipeline +designed for stack-to-local promotion that can achieve 7x speedups. + +This document explores what becomes possible when you take these properties +seriously. + +--- + +## 1. Data Analytics: Pipelines Without Plumbing + +### The Problem with Pipelines + +Every data analytics framework reinvents the same idea: take data, push it +through a sequence of transformations, collect the result. Pandas chains +methods. Spark builds DAGs. dplyr pipes with `%>%`. Unix pipes bytes through +`|`. They all converge on the same shape: **linear composition of operations +on an implicit data flow**. + +This is exactly what Forth does. It has done it since 1970. The data stack +_is_ the pipeline. Each word _is_ a transformation. Composition is +juxtaposition — you don't pipe, you don't chain, you don't bind. You just +write the words next to each other. + +```forth +\ Pandas: df['amount'].where(df['amount'] > 0).mean() +\ Forth: +: POSITIVE? ( n -- n flag ) DUP 0> ; +: FILTER-POSITIVE ( addr n -- addr' n' ) + 0 >R 0 >R \ count and sum accumulators on return stack + 0 DO + DUP I CELLS + @ + POSITIVE? IF R> + >R R> 1+ >R THEN + LOOP DROP + R> R> \ ( sum count ) +; +: MEAN ( sum count -- avg ) / ; + +data 100 FILTER-POSITIVE MEAN . +``` + +This goes a bit deeper than syntactic sugar. The absence of intermediate +variables is a structural property. In a Pandas chain, every `.method()` +returns a new DataFrame object that must be allocated, tracked, and eventually +collected. In Forth, the data flows through the stack with zero allocation. +The pipeline _is_ the execution. + +### Streaming and Incremental Computation + +The stack model is inherently streaming. A word consumes its inputs and +produces its outputs in the same motion. There is no "collect all data first, +then process" step unless you explicitly build one. This makes Forth natural +for: + +- **Event stream processing**: each event lands on the stack, a word + processes it, the result is consumed by the next word. +- **Incremental aggregation**: running sums, counts, and statistics + maintained on the return stack across invocations. +- **Windowed computation**: a circular buffer in linear memory with + stack-based access patterns. + +```forth +\ Running average over a stream of values +VARIABLE running-sum +VARIABLE running-count + +: UPDATE-AVG ( new-value -- running-avg ) + running-sum @ + DUP running-sum ! + running-count @ 1+ DUP running-count ! + / +; + +\ Each incoming value: +42 UPDATE-AVG . \ prints running average after adding 42 +17 UPDATE-AVG . \ prints updated average after adding 17 +``` + +### Client-Side Analytics via WASM + +WAFER compiles to WebAssembly. This means analytics can run _in the browser_ +with no server round-trips. A user uploads a CSV, WAFER parses and processes +it entirely client-side, and the results render immediately. No data leaves +the machine. No API calls. No latency. + +This isn't just a nice demo. For privacy-sensitive analytics (healthcare, +finance, GDPR-regulated data), client-side processing can be a compliance +requirement, not just a nice-to-have. WAFER's deterministic execution (no GC +pauses, no background threads, fixed memory layout) makes it predictable +enough for real-time dashboards. + +### Domain-Specific Languages + +Forth's defining feature is that you build the language up to your problem. +An analytics team doesn't write Forth — they write _their DSL_, which +happens to be implemented in Forth: + +```forth +\ Define a mini analytics vocabulary +: COLUMN ( col# -- addr n ) table-base SWAP col-offset + col-length ; +: SUM ( addr n -- total ) 0 ROT ROT 0 DO OVER I CELLS + @ + LOOP NIP ; +: COUNT ( addr n -- n ) NIP ; +: AVG ( addr n -- avg ) 2DUP SUM -ROT COUNT / ; +: WHERE> ( addr n thresh -- addr' n' ) filter-gt ; + +\ The analyst writes: +3 COLUMN 1000 WHERE> AVG . +\ "Average of column 3 where values exceed 1000" +``` + +The DSL compiles to WASM through WAFER's IR pipeline. There is no +interpreter overhead at query time. The analyst's vocabulary _is_ the +optimized code. + +### A Different Way to Look at It + +Most languages treat the absence of named variables as a limitation. But in +data pipelines, it can actually be a **feature**. Named intermediates create +coupling points — places where code can refer to stale state, where +refactoring requires renaming, where parallelization requires dependency +analysis. Point-free composition through a stack sidesteps this whole class +of problems. The data is always _here_, on top of the stack, ready for the +next transformation. + +--- + +## 2. Database Engine: The Query VM You Already Have + +### Databases Already Think in Stacks + +SQLite — the most deployed database engine in the world — executes queries +through the VDBE (Virtual Database Engine), a stack-based bytecode virtual +machine. When you write `SELECT * FROM users WHERE age > 30`, SQLite's query +planner compiles it into a sequence of stack operations: open cursor, seek, +compare, jump, emit row. + +PostgreSQL's executor runs a tree of plan nodes, each of which pushes tuples +upward. MySQL's handler interface is a stack of operations. CockroachDB +compiles SQL to a vectorized execution engine that operates on batches — but +the control flow is still a stack of operators. + +There's a pattern here: **query execution engines tend to converge on +stack machines**. Forth just happens to already be one, with no extra +abstraction layers in between. + +### Query Plans as Forth Programs + +A SQL query plan is a tree. Flattened into execution order, it becomes a +sequence of operations — which is exactly a Forth program: + +```sql +SELECT name, salary FROM employees WHERE dept = 'ENG' AND salary > 100000; +``` + +The query plan, expressed as Forth: + +```forth +\ Primitives provided by the storage engine +\ SCAN ( table -- cursor ) +\ NEXT-ROW ( cursor -- cursor flag ) flag=true if row available +\ COL@ ( cursor col# -- value ) +\ EMIT-ROW ( v1 v2 -- ) send to result set +\ CLOSE ( cursor -- ) + +: MATCH-DEPT? ( cursor -- cursor flag ) DUP 2 COL@ S" ENG" COMPARE 0= ; +: MATCH-SAL? ( cursor -- cursor flag ) DUP 3 COL@ 100000 > ; +: PROJECT ( cursor -- ) DUP 0 COL@ OVER 3 COL@ EMIT-ROW ; + +: QUERY ( -- ) + employees SCAN + BEGIN + NEXT-ROW + WHILE + MATCH-DEPT? IF + MATCH-SAL? IF + PROJECT + THEN + THEN + REPEAT + CLOSE +; +``` + +This isn't just pseudocode, either. Every word here could be a real WAFER +word backed by storage primitives implemented as host functions. The query +compiles through WAFER's IR pipeline to native WASM, with the same +optimization opportunities as any other Forth word: inlining, constant +folding, dead code elimination. + +### EVALUATE as Dynamic Query Compilation + +SQL databases accept queries as strings and compile them at runtime. Forth +has `EVALUATE`, which does exactly the same thing — takes a string and +compiles/executes it: + +```forth +\ Build a query string dynamically +S" employees SCAN BEGIN NEXT-ROW WHILE MATCH-DEPT? IF PROJECT THEN REPEAT CLOSE" +EVALUATE +``` + +The difference from SQL: the "query language" and the "implementation +language" are the same. There is no impedance mismatch between the language +the user writes queries in and the language the engine executes them in. A +user-defined function is just another word. An index lookup is just another +word. A join strategy is just another word. They all compose the same way. + +### Linear Memory as Storage Pages + +WAFER's linear memory model maps directly to how databases manage storage. +A database page is a fixed-size block of bytes at a known offset — exactly +what Forth's `@` and `!` operate on. B-tree nodes are structures in linear +memory traversed by pointer arithmetic: + +```forth +\ B-tree node layout: +\ +0: key count (cell) +\ +4: is-leaf flag (cell) +\ +8: keys array (key-count cells) +\ +8+4*key-count: child pointers (key-count+1 cells) + +: NODE-KEYS ( node -- addr ) 8 + ; +: NODE-KEY@ ( node i -- key ) CELLS SWAP NODE-KEYS + @ ; +: NODE-CHILD@ ( node i -- child ) + OVER NODE-KEYS + OVER @ CELLS + \ skip past keys array + SWAP CELLS + 4 + \ index into children + @ +; + +: BTREE-SEARCH ( node target-key -- addr|0 ) + OVER @ 0= IF 2DROP 0 EXIT THEN \ empty node + OVER 4 + @ IF \ leaf node + LEAF-SEARCH + ELSE + INTERNAL-SEARCH \ recurse into child + THEN +; +``` + +### WASM Sandboxing for User-Defined Functions + +Safely executing user-defined functions (UDFs) is one of the trickier +problems in database engines. PostgreSQL UDFs in C can crash the server. +JavaScript UDFs require embedding V8. Python UDFs tend to be slow. + +WAFER UDFs compile to WASM and execute in a sandbox with bounded memory, +bounded execution time, and no access to anything outside the linear memory +they're given. A malicious UDF can't read other users' data, can't make +network calls, can't crash the host. WAFER gets this for free — it's +inherent to WASM's security model. + +```forth +\ User defines a custom scoring function +: SCORE ( age salary -- score ) + 1000 / \ salary contribution (salary/1000) + SWAP 50 - ABS \ age penalty (distance from 50) + - \ final score +; + +\ Engine uses it in a query +: RANKED-QUERY ( -- ) + employees SCAN + BEGIN NEXT-ROW WHILE + DUP 1 COL@ OVER 3 COL@ SCORE + 50 > IF PROJECT THEN + REPEAT CLOSE +; +``` + +The `SCORE` function compiles to a WASM module through WAFER's JIT. It runs +at near-native speed, sandboxed, with no FFI overhead. + +### A Different Way to Look at It + +Database engineers put a lot of effort into building query VMs — designing +bytecode formats, writing interpreters, adding JIT compilation. In a sense, +they're often reinventing something Forth-shaped each time. It's worth asking: +what if you just started with Forth and built the storage layer underneath it? + +--- + +## 3. AI Inference: Neural Networks as Word Composition + +### Layers Are Words, Forward Pass Is Composition + +A neural network's forward pass is a pipeline: input tensor enters, passes +through a sequence of layers (linear transform, activation, normalization), +and a prediction exits. Each layer takes a tensor and produces a tensor. + +In Forth terms: each layer is a word. The tensor sits on the stack. The +forward pass is the composition of those words: + +```forth +\ Assuming tensor operations as primitives (host functions): +\ T-MATMUL ( tensor weights -- tensor ) +\ T-ADD ( tensor bias -- tensor ) +\ T-RELU ( tensor -- tensor ) +\ T-SOFTMAX ( tensor -- tensor ) + +: LINEAR1 ( tensor -- tensor ) w1 T-MATMUL b1 T-ADD ; +: LINEAR2 ( tensor -- tensor ) w2 T-MATMUL b2 T-ADD ; +: LINEAR3 ( tensor -- tensor ) w3 T-MATMUL b3 T-ADD ; + +: CLASSIFIER ( tensor -- tensor ) + LINEAR1 T-RELU + LINEAR2 T-RELU + LINEAR3 T-SOFTMAX +; + +input-data CLASSIFIER \ forward pass +``` + +This maps more directly than you might expect. The compositional structure of +neural networks lines up nicely with the compositional structure of Forth +programs. The stack carries the data flow. The words are the layers. The +dictionary holds the model architecture. + +### Quantized Inference on the Integer Stack + +Most production inference runs quantized — INT8 or INT4 weights, integer +arithmetic, no floating point. Forth's native data type is the integer cell. +WAFER's `i32` stack operations map directly to quantized tensor operations: + +```forth +\ INT8 quantized dot product of two vectors +: QDOT ( addr1 addr2 n -- result ) + 0 >R \ accumulator on return stack + 0 DO + OVER I + C@ 127 - \ load and de-bias first element + OVER I + C@ 127 - \ load and de-bias second element + * R> + >R \ multiply-accumulate + LOOP + 2DROP R> +; + +\ Quantized linear layer +: QLINEAR ( input-addr weight-addr rows cols -- output-addr ) + \ For each output neuron, compute QDOT with input + output-buf >R + 0 DO + 2DUP I row-offset + SWAP QDOT + R@ I CELLS + ! + LOOP + 2DROP R> +; +``` + +No framework dependency, no Python interpreter, no CUDA runtime — just +integer arithmetic on a stack, compiled to WASM, running on any device. + +### Edge AI: The 50 KB Runtime + +ML inference frameworks tend to be big. PyTorch is ~500 MB. TensorFlow Lite +is ~1 MB for the runtime alone. ONNX Runtime is ~10 MB. + +WAFER is ~50 KB for the full Forth system. The model weights dominate the +binary size, not the runtime. For edge devices — IoT sensors, wearables, +microcontrollers, browser tabs — that size difference can be the difference +between "fits" and "doesn't fit." + +WASM's portability means the same inference code runs on an ARM +microcontroller, in a browser, on a server, without recompilation. Write the +model once in Forth, deploy everywhere WASM reaches. + +### DOES> for Architecture Generation + +Forth's `DOES>` is a metaprogramming facility: it creates words that create +other words, each with custom runtime behavior. This is exactly what neural +architecture construction needs: + +```forth +\ LAYER is a defining word that creates layer words +: LAYER ( weights bias rows cols -- ) + CREATE , , , , \ store dimensions and pointers + DOES> ( tensor -- tensor ) + DUP >R \ save parameter field address + R@ @ R@ 4 + @ \ get cols, rows + R@ 8 + @ \ get weights address + T-MATMUL + R> 12 + @ \ get bias address + T-ADD +; + +\ Define the network architecture +w1 b1 768 512 LAYER EMBED +w2 b2 512 256 LAYER HIDDEN1 +w3 b3 256 10 LAYER OUTPUT + +\ The architecture is now executable +: MODEL ( tensor -- tensor ) EMBED T-RELU HIDDEN1 T-RELU OUTPUT T-SOFTMAX ; +``` + +Each `LAYER` invocation creates a new word with its own weights and +dimensions baked in. The `MODEL` word composes them. This is the same +pattern as `nn.Sequential` in PyTorch — but it compiles to WASM, has zero +framework overhead, and the "architecture definition" and the "executable +model" are the same thing. + +### Automatic Differentiation via Dual Numbers + +Backpropagation is reverse-mode automatic differentiation. There is an +elegant formulation using dual numbers (a value paired with its derivative) +that maps to Forth's double-cell operations: + +```forth +\ A dual number is a pair ( value derivative ) stored as a double cell +\ WAFER's double-cell words (D+, D-, D*, 2DUP, etc.) operate on these natively + +\ Dual addition: (a, a') + (b, b') = (a+b, a'+b') +: D+DUAL ( a a' b b' -- a+b a'+b' ) + ROT + \ a' + b' + >R + R> \ a + b, then restore derivative +; + +\ Dual multiplication: (a, a') * (b, b') = (a*b, a*b' + a'*b) +: D*DUAL ( a a' b b' -- a*b a*b'+a'*b ) + 3 PICK * \ a * b' + >R + ROT * \ a' * b + R> + \ a*b' + a'*b = derivative + >R + * \ a * b = value + R> +; +``` + +The chain rule emerges naturally: composing dual-number operations through a +sequence of words automatically computes the derivative of the whole +pipeline. This is the same principle behind JAX's `jvp` — but expressed as +stack operations. + +### A Different Way to Look at It + +Most of the ML ecosystem's complexity lives in _training_. Inference, by +comparison, is fairly straightforward: load weights, multiply matrices, apply +activations, read output. That's a pipeline of arithmetic operations — which +is pretty much what Forth was designed for. The industry tends to wrap +inference in 500 MB frameworks because training needed those frameworks, and +the two haven't been fully separated. A 50 KB Forth runtime doing quantized +integer operations might be closer to what inference actually needs than we +usually assume. + +--- + +## 4. AI Generating Code: The Smallest Target Language + +### The Token Economy + +When an LLM generates code, every token costs money and adds latency. A +Python solution to "compute the average of a list" looks like: + +```python +def average(numbers): + if not numbers: + return 0 + return sum(numbers) / len(numbers) +``` + +That is 25 tokens. The Forth equivalent: + +```forth +: AVERAGE ( addr n -- avg ) 2DUP SUM -ROT NIP / ; +``` + +That is 12 tokens. For the same semantic content, Forth uses roughly half +the tokens. At scale — millions of API calls, each generating hundreds of +lines — this is a meaningful cost reduction. But the token savings are the +least interesting advantage. + +### Minimal Syntax, Maximal Verifiability + +Forth has essentially no syntax. There are words separated by spaces. There +are numbers. There are a few special constructs (`:` for definitions, `IF` +/`THEN` for conditionals, `DO`/`LOOP` for iteration). That's about it. + +An LLM generating Python must get indentation right, match parentheses and +brackets, handle keyword arguments, manage import statements, respect method +resolution order, and navigate a standard library of thousands of functions. +An LLM generating Forth mostly just needs to get the stack effect right. +That's the main failure mode worth worrying about. + +And stack effects are **mechanically verifiable**: + +```forth +\ Stack effect: ( n1 n2 -- n3 ) +\ Verification: start with 2 items on stack, end with 1 +: ADD-AND-DOUBLE ( n1 n2 -- n3 ) + 2* ; + +\ Test: +3 4 ADD-AND-DOUBLE \ stack should contain: 14 +``` + +You don't need a type checker or static analysis. Just run the word with +known inputs and check the stack. If the stack depth and values match the +declared effect, the word is correct. It's hard to think of another practical +language where verification is this straightforward. + +### Self-Extending Vocabulary + +LLMs struggle with large codebases because context windows are finite. A +Python project with 50 files and 10,000 lines requires the LLM to hold (or +retrieve) vast amounts of context to generate correct code. + +Forth's defining characteristic is that you build the language up to your +problem. The LLM doesn't need to generate a 100-line solution. It generates +5-line words, each building on the previous ones: + +```forth +\ Step 1: LLM generates basic operations +: CLAMP ( n lo hi -- n' ) ROT MIN MAX ; +: BETWEEN? ( n lo hi -- flag ) OVER - >R - R> U< ; + +\ Step 2: LLM generates higher-level operations using step 1 +: NORMALIZE ( n -- n' ) 0 255 CLAMP ; +: IN-RANGE? ( n -- flag ) 0 100 BETWEEN? ; + +\ Step 3: LLM generates application logic using steps 1-2 +: PROCESS-SENSOR ( raw -- calibrated ) + offset @ - \ remove sensor offset + NORMALIZE \ clamp to valid range + scale @ * 1000 / \ apply calibration scale +; +``` + +Each step requires only the _names_ of previously defined words, not their +implementations. The dictionary serves as a compressed representation of the +entire program. An LLM can generate correct code by knowing only the word +names and their stack effects — a few dozen tokens of context instead of +thousands of lines. + +### WASM Sandbox: Safe Execution of Untrusted Code + +AI-generated code generally needs to be executed to be verified. Running +arbitrary Python is tricky from a security perspective — file system access, +network calls, `import os`, `eval()`. Sandboxing Python typically requires +containerization, seccomp filters, or virtual machines. + +WAFER compiles to WASM, which executes in a sandbox by construction. A +WAFER program: + +- Cannot access the file system +- Cannot make network calls +- Cannot read memory outside its linear memory +- Cannot execute longer than the host allows (fuel metering) +- Cannot consume more memory than the host allocates + +You can run AI-generated Forth with roughly the same confidence as a pure +mathematical function. The sandbox isn't a bolt-on — it's just how WASM +works. + +```forth +\ AI generates this code. Is it safe to run? Yes, always. +: FIBONACCI ( n -- fib ) + DUP 2 < IF EXIT THEN + DUP 1- RECURSE + SWAP 2 - RECURSE + + +; +``` + +There's nothing this word can do except compute. No side effects, no +escape hatches. The WASM sandbox guarantees that structurally. + +### A Different Way to Look at It + +The conventional wisdom is that LLMs need expressive, high-level languages +to generate useful code. But there's a good case for the opposite: what LLMs +really benefit from are **verifiable** languages — ones where correctness can +be checked cheaply and deterministically. Expressiveness can actually work +against you here: more syntax means more ways to be wrong, more edge cases +to handle, more context to maintain. Forth's extreme minimalism starts to +look less like a limitation and more like an advantage: generate a few small +words, verify each one by running it, compose them into larger programs with +confidence. The language that's hardest for humans to read might just be the +easiest for machines to write correctly. + +--- + +## 5. AI Agent Control: Plans That Execute Themselves + +### The Plan-Program Gap + +When an AI agent "plans," it produces a sequence of steps in natural +language: + +> 1. Search for files matching "*.config" +> 2. Read each file and extract the "timeout" field +> 3. If timeout > 30, update it to 30 +> 4. Write the modified files back + +This plan is then "executed" by the agent interpreting each step, calling +tools, handling errors, and managing state — all mediated by the LLM at +every step, consuming tokens and latency for what is fundamentally a +sequential program. + +The gap between "plan" and "program" might be more artificial than it looks. +A plan _is_ a program — we just don't usually give agents a good executable +representation for it. + +Forth could be that representation. + +### Tools as Words + +Every agent tool — file read, web search, code execution, API call — maps +to a Forth word. The agent's toolkit becomes a Forth dictionary: + +```forth +\ Agent tool vocabulary (host functions) +\ SEARCH-FILES ( pattern-addr pattern-len -- results-addr count ) +\ READ-FILE ( path-addr path-len -- content-addr content-len ) +\ WRITE-FILE ( content-addr content-len path-addr path-len -- ) +\ JSON-GET ( json-addr key-addr key-len -- value-addr value-len ) +\ SHELL ( cmd-addr cmd-len -- output-addr output-len ) +\ ASK-USER ( question-addr question-len -- answer-addr answer-len ) +``` + +Now the plan from above becomes an executable program: + +```forth +: UPDATE-TIMEOUTS ( -- ) + S" *.config" SEARCH-FILES \ get matching files + 0 DO \ for each file + DUP I CELLS + @ COUNT \ get filename + 2DUP READ-FILE \ read contents + S" timeout" JSON-GET \ extract timeout field + S>NUMBER DROP \ convert to number + 30 > IF \ if timeout > 30 + 30 SET-TIMEOUT \ update to 30 + WRITE-FILE \ write back + ELSE + 2DROP \ discard unchanged + THEN + LOOP + DROP +; + +UPDATE-TIMEOUTS +``` + +This isn't a description of what to do — it _is_ what to do. The agent +generates it, WAFER compiles it to WASM, and it runs — no LLM in the loop +during execution, no token cost per step, no latency per tool call. + +### Error Handling with CATCH/THROW + +Of course, agent plans fail. Files don't exist. APIs return errors. +Permissions get denied. Production agent systems need robust error handling, +which typically means calling the LLM at every step to decide what to do +when something goes wrong. + +WAFER has `CATCH` and `THROW` — structured exception handling that lets +the plan itself define error recovery: + +```forth +: SAFE-READ ( path-addr path-len -- content-addr content-len | 0 0 ) + ['] READ-FILE CATCH IF + 2DROP 0 0 \ file not found: return empty + THEN +; + +: SAFE-UPDATE ( filename-addr filename-len -- ) + 2DUP SAFE-READ \ try to read + DUP 0= IF 2DROP 2DROP EXIT THEN \ skip if file missing + S" timeout" JSON-GET + S>NUMBER DROP + 30 > IF + 30 SET-TIMEOUT + WRITE-FILE + ELSE + 2DROP 2DROP + THEN +; + +: ROBUST-UPDATE-TIMEOUTS ( -- ) + S" *.config" SEARCH-FILES + 0 DO + DUP I CELLS + @ COUNT SAFE-UPDATE + LOOP + DROP +; +``` + +The error handling is part of the plan. The agent generates it once, and it +runs to completion without further LLM intervention. Errors are handled at +the speed of WASM, not the speed of an API call to an LLM. + +### The Dictionary as Growing Capability + +A human Forth programmer builds up vocabulary: small words compose into +larger words, which compose into still larger words. The dictionary grows +with the programmer's understanding of the problem. + +An AI agent does the same thing. Each successfully executed plan leaves +behind defined words that can be reused: + +```forth +\ First task: agent learns to read configs +: READ-CONFIG ( path-addr path-len -- json-addr json-len ) + SAFE-READ DUP 0= IF EXIT THEN JSON-PARSE ; + +\ Second task: agent learns to update configs +: UPDATE-CONFIG ( key-addr key-len value path-addr path-len -- ) + 2DUP READ-CONFIG JSON-SET WRITE-FILE ; + +\ Third task: agent composes previous capabilities +: MIGRATE-CONFIGS ( -- ) + S" *.config" SEARCH-FILES + 0 DO + DUP I CELLS + @ COUNT + S" timeout" 30 ROT ROT UPDATE-CONFIG + LOOP DROP +; + +\ The agent's vocabulary grows with experience. +\ MIGRATE-CONFIGS didn't exist before. Now it does. +\ Next time, the agent can use it as a building block. +``` + +You could call this _learned tool use_ — not in the machine learning sense, +but in the software engineering sense. The agent defines new capabilities in +terms of old ones, and the dictionary persists across invocations. Over time, +the agent's vocabulary naturally converges on the abstractions that matter +for its operational domain. + +### REPL as Test-Before-Commit + +Agents that act irreversibly on the first try are risky. WAFER's REPL model +gives agents a natural test-before-commit workflow: + +1. **Define**: Generate and compile the plan as Forth words. +2. **Test**: Run the words against sample data on the stack. +3. **Verify**: Check the stack for expected results. +4. **Execute**: Run the plan for real only after verification passes. + +```forth +\ Step 1: Define +: CALCULATE-DISCOUNT ( price tier -- discounted ) + CASE + 1 OF 10 ENDOF \ tier 1: 10% off + 2 OF 20 ENDOF \ tier 2: 20% off + 3 OF 35 ENDOF \ tier 3: 35% off + 0 SWAP + ENDCASE + 100 SWAP - * 100 / +; + +\ Step 2: Test (no side effects, just stack operations) +1000 1 CALCULATE-DISCOUNT . \ expect 900 +1000 2 CALCULATE-DISCOUNT . \ expect 800 +1000 3 CALCULATE-DISCOUNT . \ expect 650 + +\ Step 3: Verify output matches expectations +\ Step 4: Apply to real data only after tests pass +``` + +The agent can generate, test, and iterate without ever touching production +data. The REPL isn't just a debugging convenience here — it's a safety mechanism +for autonomous agents. + +### Multi-Agent Coordination + +Multiple agents can share a WAFER dictionary through shared linear memory. +One agent defines words. Another agent uses them. A coordinator agent +composes them into higher-level plans: + +```forth +\ Agent A defines data retrieval +: FETCH-METRICS ( -- addr n ) metrics-api QUERY PARSE-JSON ; + +\ Agent B defines analysis +: DETECT-ANOMALIES ( addr n -- anomalies-addr n ) + THRESHOLD @ FILTER-ABOVE ; + +\ Agent C defines actions +: ALERT ( anomalies-addr n -- ) + 0 DO DUP I CELLS + @ SEND-ALERT LOOP DROP ; + +\ Coordinator composes them +: MONITOR ( -- ) + BEGIN + FETCH-METRICS DETECT-ANOMALIES + DUP 0> IF ALERT ELSE DROP THEN + 60000 DELAY + AGAIN +; +``` + +Each agent contributes words to a shared vocabulary. The coordinator doesn't +need to understand the implementation of `FETCH-METRICS` or +`DETECT-ANOMALIES` — it only needs to know their stack effects. This is +composability without coupling, coordination without shared state beyond +the dictionary. + +### A Different Way to Look at It + +The AI agent community is building increasingly sophisticated "plan +representations" — DAGs, state machines, behavior trees, ReAct loops — all +trying to bridge the gap between the LLM's natural language output and +actual tool execution. But Forth is already a plan representation that +doubles as an execution engine. It has structured control flow (`IF`/`THEN`, +`DO`/`LOOP`, `BEGIN`/`UNTIL`), error handling (`CATCH`/`THROW`), +composability (word definitions), and a test harness (the REPL and stack). +Maybe the gap between "plan" and "program" doesn't need to be bridged so +much as it needs to be _erased_. + +--- + +## Convergence: Five Problems, One Shape + +These five domains look different on the surface: + +| Domain | Traditional Tool | Core Operation | +| --------------- | ------------------------------ | -------------------- | +| Data analytics | Pandas, Spark | Transform pipeline | +| Database engine | SQLite VDBE, Postgres executor | Query plan execution | +| AI inference | PyTorch, TensorFlow | Layer composition | +| AI codegen | Python, JavaScript | Program synthesis | +| AI agents | LangChain, CrewAI | Plan execution | + +But they share a deep structure: **sequential composition of simple +operations on a data flow**. A data pipeline, a query plan, a forward +pass, a synthesized program, and an agent plan are all the same thing: +a sequence of words applied to a stack. + +Forth noticed this in 1970. Charles Moore designed a language around the +observation that most computation is a pipeline of transformations, and +the simplest way to express pipelines is sequential composition on a +stack. The language has no syntax because pipelines don't need syntax. +It has no type system because the data flow _is_ the type. It has no +package manager because each program builds its own vocabulary from +primitives. + +WAFER brings these ideas to the modern world by targeting WebAssembly — the +universal runtime that runs in browsers, on servers, on edge devices, in +sandboxes. That combination opens up some interesting possibilities: + +- **Analytics in the browser** with no server, no framework, deterministic + execution. +- **Database VMs** that compile queries to native WASM through an existing + Forth JIT. +- **Inference engines** that fit in 50 KB and run on any device WASM + reaches. +- **AI-generated code** in the language with the smallest syntax, cheapest + verification, and safest sandbox. +- **Agent plans** that are executable programs, testable in a REPL, + composable through a growing dictionary. + +None of this requires Forth to change. Forth has been this shape for 55 +years. It's kind of fun that the world's problems seem to be circling back +to it. + +--- + +_WAFER is open source. Start at the [repository root](../README.md)._ +_Architecture details: [WAFER.md](WAFER.md). Language introduction: +[FORTH.md](FORTH.md)._