From 5d5ae3d2036187a2152f614c6bfa1511325a3126 Mon Sep 17 00:00:00 2001 From: Oleksandr Kozachuk Date: Wed, 8 Apr 2026 10:31:46 +0200 Subject: [PATCH] Make PARSE/PARSE-NAME inline host functions, fix stack residue cascade MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PARSE and PARSE-NAME were using the deferred pending mechanism which broke when called from compiled code (the calling word continued executing before PARSE ran). Replaced with inline host functions that read >IN/#TIB directly from WASM memory and parse immediately. This fixes utilities.fth $"/$2" failures that left stack residue cascading into all subsequent compliance test suites. Also: core_ext 17→14, string 27→17. --- crates/core/src/outer.rs | 121 ++++++++++++++++++++++++++++++++++++--- test.fth | 2 + test_js.html | 18 ++++++ test_js.js | 63 ++++++++++++++++++++ 4 files changed, 196 insertions(+), 8 deletions(-) create mode 100644 test.fth create mode 100644 test_js.html create mode 100644 test_js.js diff --git a/crates/core/src/outer.rs b/crates/core/src/outer.rs index 08f086b..33eb3be 100644 --- a/crates/core/src/outer.rs +++ b/crates/core/src/outer.rs @@ -4685,14 +4685,71 @@ impl ForthVM { Ok(()) } - /// PARSE as a host function for compiled code. + /// PARSE ( char "ccc" -- c-addr u ) as inline host function. fn register_parse_host(&mut self) -> anyhow::Result<()> { - let pending = Arc::clone(&self.pending_define); + let memory = self.memory; + let dsp = self.dsp; + let func = Func::new( &mut self.store, FuncType::new(&self.engine, [], []), - move |_caller, _params, _results| { - *pending.lock().unwrap() = 7; + move |mut caller, _params, _results| { + // Pop delimiter from data stack + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let data = memory.data(&caller); + let b: [u8; 4] = data[sp as usize..sp as usize + 4].try_into().unwrap(); + let delim = i32::from_le_bytes(b) as u8; + let sp = sp + CELL_SIZE; // pop delimiter + + // Read >IN and #TIB from WASM memory + let data = memory.data(&caller); + let b: [u8; 4] = data[SYSVAR_TO_IN as usize..SYSVAR_TO_IN as usize + 4] + .try_into() + .unwrap(); + let mut to_in = u32::from_le_bytes(b); + let b: [u8; 4] = data[SYSVAR_NUM_TIB as usize..SYSVAR_NUM_TIB as usize + 4] + .try_into() + .unwrap(); + let num_tib = u32::from_le_bytes(b); + + // Skip one leading space (outer interpreter's trailing delimiter) + if to_in < num_tib { + let data = memory.data(&caller); + if data[(INPUT_BUFFER_BASE + to_in) as usize] == b' ' { + to_in += 1; + } + } + + // Parse until delimiter + let start = to_in; + while to_in < num_tib { + let data = memory.data(&caller); + if data[(INPUT_BUFFER_BASE + to_in) as usize] == delim { + break; + } + to_in += 1; + } + let parsed_len = to_in - start; + + // Skip past delimiter + if to_in < num_tib { + to_in += 1; + } + + // Update >IN in WASM memory + let data = memory.data_mut(&mut caller); + data[SYSVAR_TO_IN as usize..SYSVAR_TO_IN as usize + 4] + .copy_from_slice(&to_in.to_le_bytes()); + + // Push (c-addr u) to data stack + let c_addr = INPUT_BUFFER_BASE + start; + let new_sp = sp - 2 * CELL_SIZE; + data[new_sp as usize..new_sp as usize + 4] + .copy_from_slice(&(parsed_len as i32).to_le_bytes()); + data[(new_sp + CELL_SIZE) as usize..(new_sp + 2 * CELL_SIZE) as usize] + .copy_from_slice(&(c_addr as i32).to_le_bytes()); + dsp.set(&mut caller, Val::I32(new_sp as i32))?; + Ok(()) }, ); @@ -4701,14 +4758,62 @@ impl ForthVM { Ok(()) } - /// PARSE-NAME as a host function for compiled code. + /// PARSE-NAME ( "name" -- c-addr u ) as inline host function. fn register_parse_name_host(&mut self) -> anyhow::Result<()> { - let pending = Arc::clone(&self.pending_define); + let memory = self.memory; + let dsp = self.dsp; + let func = Func::new( &mut self.store, FuncType::new(&self.engine, [], []), - move |_caller, _params, _results| { - *pending.lock().unwrap() = 8; + move |mut caller, _params, _results| { + // Read >IN and #TIB from WASM memory + let data = memory.data(&caller); + let b: [u8; 4] = data[SYSVAR_TO_IN as usize..SYSVAR_TO_IN as usize + 4] + .try_into() + .unwrap(); + let mut to_in = u32::from_le_bytes(b); + let b: [u8; 4] = data[SYSVAR_NUM_TIB as usize..SYSVAR_NUM_TIB as usize + 4] + .try_into() + .unwrap(); + let num_tib = u32::from_le_bytes(b); + + // Skip leading whitespace + while to_in < num_tib { + let data = memory.data(&caller); + if !data[(INPUT_BUFFER_BASE + to_in) as usize].is_ascii_whitespace() { + break; + } + to_in += 1; + } + let start = to_in; + + // Parse until whitespace + while to_in < num_tib { + let data = memory.data(&caller); + if data[(INPUT_BUFFER_BASE + to_in) as usize].is_ascii_whitespace() { + break; + } + to_in += 1; + } + let parsed_len = to_in - start; + + // Update >IN + let data = memory.data_mut(&mut caller); + data[SYSVAR_TO_IN as usize..SYSVAR_TO_IN as usize + 4] + .copy_from_slice(&to_in.to_le_bytes()); + + // Push (c-addr u) to data stack + let c_addr = INPUT_BUFFER_BASE + start; + let sp = dsp.get(&mut caller).unwrap_i32() as u32; + let new_sp = sp - 2 * CELL_SIZE; + let data = memory.data_mut(&mut caller); + data[new_sp as usize..new_sp as usize + 4] + .copy_from_slice(&(parsed_len as i32).to_le_bytes()); + data[(new_sp + CELL_SIZE) as usize..(new_sp + 2 * CELL_SIZE) as usize] + .copy_from_slice(&(c_addr as i32).to_le_bytes()); + dsp.set(&mut caller, Val::I32(new_sp as i32))?; + Ok(()) }, ); diff --git a/test.fth b/test.fth new file mode 100644 index 0000000..30efda5 --- /dev/null +++ b/test.fth @@ -0,0 +1,2 @@ +: SQUARE DUP * ; +7 SQUARE . CR diff --git a/test_js.html b/test_js.html new file mode 100644 index 0000000..c7f87c4 --- /dev/null +++ b/test_js.html @@ -0,0 +1,18 @@ + + + + + WAFER - test_js.wasm + + + +

WAFER Output

+
+ + + + diff --git a/test_js.js b/test_js.js new file mode 100644 index 0000000..bc57a3d --- /dev/null +++ b/test_js.js @@ -0,0 +1,63 @@ +// WAFER JS Loader - generated by wafer build --js +// Loads and runs test_js.wasm in the browser. + +const WAFER = (() => { + const CELL_SIZE = 4; + const DATA_STACK_TOP = 0x1540; + const SYSVAR_BASE = 0x0004; + let outputCallback = (s) => { + const el = document.getElementById('output'); + if (el) el.textContent += s; + else console.log(s); + }; + + async function run(opts) { + if (opts && opts.output) outputCallback = opts.output; + + const memory = new WebAssembly.Memory({ initial: 16 }); + const dsp = new WebAssembly.Global({ value: 'i32', mutable: true }, 5440); + const rsp = new WebAssembly.Global({ value: 'i32', mutable: true }, 9536); + const fsp = new WebAssembly.Global({ value: 'i32', mutable: true }, 11584); + const table = new WebAssembly.Table({ element: 'anyfunc', initial: 256 }); + + function emit(code) { + outputCallback(String.fromCharCode(code)); + } + + const importObject = { + env: { emit, memory, dsp, rsp, fsp, table } + }; + + // Register host functions + const view = () => new DataView(memory.buffer); + const pop = () => { + const sp = dsp.value; + const v = view().getInt32(sp, true); + dsp.value = sp + CELL_SIZE; + return v; + }; + const push = (v) => { + const sp = dsp.value - CELL_SIZE; + view().setInt32(sp, v, true); + dsp.value = sp; + }; + + table.set(84, new WebAssembly.Function({parameters:[], results:[]}, () => { + const n = pop(); + const base = view().getUint32(SYSVAR_BASE, true); + outputCallback((base === 16 ? n.toString(16).toUpperCase() : n.toString()) + ' '); + })); + + const response = await fetch('test_js.wasm'); + const bytes = await response.arrayBuffer(); + const { instance } = await WebAssembly.instantiate(bytes, importObject); + + if (instance.exports._start) { + instance.exports._start(); + } + + return instance; + } + + return { run }; +})();