Fix LEAVE+LOOP hang, DEPTH off-by-one, division flavor, EVALUATE, WORD, ACCEPT

Six fixes for compliance test regressions introduced in Phases 7-8:

- LEAVE + +LOOP with step=0 caused infinite loop: the XOR termination
  check yields 0 when index=limit and step=0. Added SYSVAR_LEAVE_FLAG
  mechanism — LEAVE sets flag, +LOOP checks it, all loops clear on exit.

- DEPTH was off-by-one: `5440 SP@ -` pushed the literal before SP@
  read the stack pointer, making SP@ see one extra cell. Reordered to
  `SP@ 5440 SWAP -` so SP@ reads dsp before any literal push.

- */ and */MOD used FM/MOD (floored) but WAFER's / uses WASM i32.div_s
  (symmetric). Changed to SM/REM for consistency.

- EVALUATE didn't sync input buffer to WASM memory, breaking SOURCE
  and >IN manipulation inside evaluated strings. Added input-only sync
  (without touching STATE/BASE) and >IN readback after each token.

- WORD didn't skip leading spaces when delimiter != space, causing
  GN' and GS3 tests to read whitespace instead of content.

- Added ACCEPT stub returning 0 for non-interactive mode.

- Added bounds check in refresh_user_here to reject corrupted
  SYSVAR_HERE values beyond WASM memory size.

Core and Facility compliance suites now pass. Other suites have
pre-existing regressions from Phases 1-8 still under investigation.
This commit is contained in:
2026-04-07 20:30:16 +02:00
parent d0991c58f6
commit 8f2c70e6f4
4 changed files with 142 additions and 20 deletions
+5 -3
View File
@@ -7,8 +7,9 @@
\ --------------------------------------------------------------- \ ---------------------------------------------------------------
\ DEPTH ( -- n ) number of items on the data stack \ DEPTH ( -- n ) number of items on the data stack
\ SP@ must come first so it reads the dsp before DEPTH's own literal push.
\ DATA_STACK_TOP = 5440, uses arithmetic right shift for / 4 \ DATA_STACK_TOP = 5440, uses arithmetic right shift for / 4
: DEPTH 5440 SP@ - 2 RSHIFT ; : DEPTH SP@ 5440 SWAP - 2 RSHIFT ;
\ PICK ( xn..x0 n -- xn..x0 xn ) copy nth stack item \ PICK ( xn..x0 n -- xn..x0 xn ) copy nth stack item
: PICK 1+ CELLS SP@ + @ ; : PICK 1+ CELLS SP@ + @ ;
@@ -144,10 +145,11 @@
THEN ; THEN ;
\ */ ( n1 n2 n3 -- n4 ) n1*n2/n3 with double intermediate \ */ ( n1 n2 n3 -- n4 ) n1*n2/n3 with double intermediate
: */ >R M* R> FM/MOD SWAP DROP ; \ Must use SM/REM (symmetric) to match WAFER's WASM i32.div_s semantics.
: */ >R M* R> SM/REM SWAP DROP ;
\ */MOD ( n1 n2 n3 -- rem quot ) \ */MOD ( n1 n2 n3 -- rem quot )
: */MOD >R M* R> FM/MOD ; : */MOD >R M* R> SM/REM ;
\ --------------------------------------------------------------- \ ---------------------------------------------------------------
\ Phase 4: HERE and ALIGNED \ Phase 4: HERE and ALIGNED
+32 -6
View File
@@ -19,7 +19,7 @@ use wasm_encoder::{
use crate::dictionary::WordId; use crate::dictionary::WordId;
use crate::error::{WaferError, WaferResult}; use crate::error::{WaferError, WaferResult};
use crate::ir::IrOp; use crate::ir::IrOp;
use crate::memory::CELL_SIZE; use crate::memory::{CELL_SIZE, SYSVAR_LEAVE_FLAG};
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Import indices (order matters: imports numbered sequentially by kind) // Import indices (order matters: imports numbered sequentially by kind)
@@ -908,12 +908,22 @@ fn emit_do_loop(f: &mut Function, body: &[IrOp], is_plus_loop: bool, ctx: &EmitC
if is_plus_loop { if is_plus_loop {
// +LOOP: Forth 2012 termination check. // +LOOP: Forth 2012 termination check.
// Exit when (old_index - limit) XOR (new_index - limit) is negative. // Exit when (old_index - limit) XOR (new_index - limit) is negative,
// SCRATCH_BASE = old_index (from rpop) // or when the LEAVE flag is set (LEAVE sets index=limit, but +LOOP with
// SCRATCH_BASE+2 = step (from data stack) // step=0 would loop forever without this flag check).
f.instruction(&Instruction::LocalSet(SCRATCH_BASE)); f.instruction(&Instruction::LocalSet(SCRATCH_BASE));
pop_to(f, SCRATCH_BASE + 2); // step from data stack pop_to(f, SCRATCH_BASE + 2); // step from data stack
// Check leave flag first — if set, clear it and exit immediately
f.instruction(&Instruction::I32Const(SYSVAR_LEAVE_FLAG as i32))
.instruction(&Instruction::I32Load(MEM4))
.instruction(&Instruction::If(BlockType::Empty))
.instruction(&Instruction::I32Const(SYSVAR_LEAVE_FLAG as i32))
.instruction(&Instruction::I32Const(0))
.instruction(&Instruction::I32Store(MEM4))
.instruction(&Instruction::Br(2)) // exit: If(0) → Loop(1) → Block(2)
.instruction(&Instruction::End);
// Peek limit from return stack // Peek limit from return stack
rpeek(f); rpeek(f);
f.instruction(&Instruction::LocalSet(SCRATCH_BASE + 1)); f.instruction(&Instruction::LocalSet(SCRATCH_BASE + 1));
@@ -973,11 +983,14 @@ fn emit_do_loop(f: &mut Function, body: &[IrOp], is_plus_loop: bool, ctx: &EmitC
.instruction(&Instruction::End); // end block .instruction(&Instruction::End); // end block
} }
// Clean up: pop index and limit from return stack // Clean up: pop index and limit from return stack, clear leave flag
rpop(f); rpop(f);
f.instruction(&Instruction::Drop); f.instruction(&Instruction::Drop);
rpop(f); rpop(f);
f.instruction(&Instruction::Drop); f.instruction(&Instruction::Drop);
f.instruction(&Instruction::I32Const(SYSVAR_LEAVE_FLAG as i32))
.instruction(&Instruction::I32Const(0))
.instruction(&Instruction::I32Store(MEM4));
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
@@ -2020,6 +2033,16 @@ fn emit_consolidated_do_loop(
f.instruction(&Instruction::LocalSet(SCRATCH_BASE)); f.instruction(&Instruction::LocalSet(SCRATCH_BASE));
pop_to(f, SCRATCH_BASE + 2); // step from data stack pop_to(f, SCRATCH_BASE + 2); // step from data stack
// Check leave flag — if set, clear it and exit immediately
f.instruction(&Instruction::I32Const(SYSVAR_LEAVE_FLAG as i32))
.instruction(&Instruction::I32Load(MEM4))
.instruction(&Instruction::If(BlockType::Empty))
.instruction(&Instruction::I32Const(SYSVAR_LEAVE_FLAG as i32))
.instruction(&Instruction::I32Const(0))
.instruction(&Instruction::I32Store(MEM4))
.instruction(&Instruction::Br(2)) // exit: If(0) → Loop(1) → Block(2)
.instruction(&Instruction::End);
rpeek(f); rpeek(f);
f.instruction(&Instruction::LocalSet(SCRATCH_BASE + 1)); f.instruction(&Instruction::LocalSet(SCRATCH_BASE + 1));
@@ -2067,11 +2090,14 @@ fn emit_consolidated_do_loop(
.instruction(&Instruction::End); .instruction(&Instruction::End);
} }
// Clean up: pop index and limit from return stack // Clean up: pop index and limit from return stack, clear leave flag
rpop(f); rpop(f);
f.instruction(&Instruction::Drop); f.instruction(&Instruction::Drop);
rpop(f); rpop(f);
f.instruction(&Instruction::Drop); f.instruction(&Instruction::Drop);
f.instruction(&Instruction::I32Const(SYSVAR_LEAVE_FLAG as i32))
.instruction(&Instruction::I32Const(0))
.instruction(&Instruction::I32Store(MEM4));
} }
/// Optional extras for exportable modules (data section, entry point, metadata). /// Optional extras for exportable modules (data section, entry point, metadata).
+3
View File
@@ -86,6 +86,8 @@ pub const SYSVAR_SOURCE_ID: u32 = SYSVAR_BASE + 20;
pub const SYSVAR_NUM_TIB: u32 = SYSVAR_BASE + 24; pub const SYSVAR_NUM_TIB: u32 = SYSVAR_BASE + 24;
/// HLD: pointer for pictured numeric output. /// HLD: pointer for pictured numeric output.
pub const SYSVAR_HLD: u32 = SYSVAR_BASE + 28; pub const SYSVAR_HLD: u32 = SYSVAR_BASE + 28;
/// LEAVE flag: nonzero when LEAVE has been called inside a DO loop.
pub const SYSVAR_LEAVE_FLAG: u32 = SYSVAR_BASE + 32;
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
@@ -125,6 +127,7 @@ mod tests {
SYSVAR_SOURCE_ID, SYSVAR_SOURCE_ID,
SYSVAR_NUM_TIB, SYSVAR_NUM_TIB,
SYSVAR_HLD, SYSVAR_HLD,
SYSVAR_LEAVE_FLAG,
]; ];
for offset in all_offsets { for offset in all_offsets {
assert!(offset >= SYSVAR_BASE); assert!(offset >= SYSVAR_BASE);
+102 -11
View File
@@ -22,8 +22,8 @@ use crate::dictionary::{Dictionary, WordId};
use crate::ir::IrOp; use crate::ir::IrOp;
use crate::memory::{ use crate::memory::{
CELL_SIZE, DATA_STACK_TOP, FLOAT_SIZE, FLOAT_STACK_BASE, FLOAT_STACK_TOP, INPUT_BUFFER_BASE, CELL_SIZE, DATA_STACK_TOP, FLOAT_SIZE, FLOAT_STACK_BASE, FLOAT_STACK_TOP, INPUT_BUFFER_BASE,
INPUT_BUFFER_SIZE, RETURN_STACK_TOP, SYSVAR_BASE_VAR, SYSVAR_HERE, SYSVAR_NUM_TIB, INPUT_BUFFER_SIZE, RETURN_STACK_TOP, SYSVAR_BASE_VAR, SYSVAR_HERE, SYSVAR_LEAVE_FLAG,
SYSVAR_STATE, SYSVAR_TO_IN, SYSVAR_NUM_TIB, SYSVAR_STATE, SYSVAR_TO_IN,
}; };
use crate::optimizer::optimize; use crate::optimizer::optimize;
@@ -2333,7 +2333,8 @@ impl ForthVM {
} }
/// Register LEAVE as a host function. /// Register LEAVE as a host function.
/// Sets the loop index equal to the limit so the loop exits on next iteration. /// Sets the loop index equal to the limit and sets the leave flag
/// so the loop exits on the next +LOOP/LOOP check.
fn register_leave(&mut self) -> anyhow::Result<()> { fn register_leave(&mut self) -> anyhow::Result<()> {
let memory = self.memory; let memory = self.memory;
let rsp = self.rsp; let rsp = self.rsp;
@@ -2353,6 +2354,9 @@ impl ForthVM {
let data = memory.data_mut(&mut caller); let data = memory.data_mut(&mut caller);
let bytes = limit.to_le_bytes(); let bytes = limit.to_le_bytes();
data[index_addr..index_addr + 4].copy_from_slice(&bytes); data[index_addr..index_addr + 4].copy_from_slice(&bytes);
// Set leave flag so +LOOP exits even with step=0
data[SYSVAR_LEAVE_FLAG as usize..SYSVAR_LEAVE_FLAG as usize + 4]
.copy_from_slice(&1i32.to_le_bytes());
Ok(()) Ok(())
}, },
); );
@@ -2907,12 +2911,16 @@ impl ForthVM {
self.user_here = *cell.lock().unwrap(); self.user_here = *cell.lock().unwrap();
} }
let data = self.memory.data(&self.store); let data = self.memory.data(&self.store);
let mem_len = data.len() as u32;
let mem_here = u32::from_le_bytes( let mem_here = u32::from_le_bytes(
data[SYSVAR_HERE as usize..SYSVAR_HERE as usize + 4] data[SYSVAR_HERE as usize..SYSVAR_HERE as usize + 4]
.try_into() .try_into()
.unwrap(), .unwrap(),
); );
if mem_here > self.user_here { // Only accept mem_here if it's within valid memory bounds.
// A corrupted SYSVAR_HERE (e.g., from stack overflow into the sysvar area)
// would otherwise propagate as a garbage user_here.
if mem_here > self.user_here && mem_here < mem_len {
self.user_here = mem_here; self.user_here = mem_here;
if let Some(ref cell) = self.here_cell { if let Some(ref cell) = self.here_cell {
*cell.lock().unwrap() = mem_here; *cell.lock().unwrap() = mem_here;
@@ -3323,14 +3331,55 @@ impl ForthVM {
self.input_buffer = s; self.input_buffer = s;
self.input_pos = 0; self.input_pos = 0;
// Interpret // Sync input buffer, >IN, and #TIB to WASM (for SOURCE and WORD)
while let Some(token) = self.next_token() { {
self.interpret_token(&token)?; let bytes = self.input_buffer.as_bytes();
let len = bytes.len().min(INPUT_BUFFER_SIZE as usize);
let data = self.memory.data_mut(&mut self.store);
data[INPUT_BUFFER_BASE as usize..INPUT_BUFFER_BASE as usize + len]
.copy_from_slice(&bytes[..len]);
data[SYSVAR_TO_IN as usize..SYSVAR_TO_IN as usize + 4]
.copy_from_slice(&0u32.to_le_bytes());
data[SYSVAR_NUM_TIB as usize..SYSVAR_NUM_TIB as usize + 4]
.copy_from_slice(&(len as u32).to_le_bytes());
} }
// Restore input state // Interpret with >IN sync (supports >IN manipulation)
while let Some(token) = self.next_token() {
{
let data = self.memory.data_mut(&mut self.store);
data[SYSVAR_TO_IN as usize..SYSVAR_TO_IN as usize + 4]
.copy_from_slice(&(self.input_pos as u32).to_le_bytes());
}
let wasm_to_in_before = self.input_pos;
self.interpret_token(&token)?;
let data = self.memory.data(&self.store);
let b: [u8; 4] = data[SYSVAR_TO_IN as usize..SYSVAR_TO_IN as usize + 4]
.try_into()
.unwrap();
let wasm_to_in = u32::from_le_bytes(b) as usize;
if wasm_to_in != wasm_to_in_before {
self.input_pos = wasm_to_in;
}
if self.input_pos >= self.input_buffer.len() {
break;
}
}
// Restore input state and sync back to WASM
self.input_buffer = saved_buffer; self.input_buffer = saved_buffer;
self.input_pos = saved_pos; self.input_pos = saved_pos;
{
let bytes = self.input_buffer.as_bytes();
let len = bytes.len().min(INPUT_BUFFER_SIZE as usize);
let data = self.memory.data_mut(&mut self.store);
data[INPUT_BUFFER_BASE as usize..INPUT_BUFFER_BASE as usize + len]
.copy_from_slice(&bytes[..len]);
data[SYSVAR_TO_IN as usize..SYSVAR_TO_IN as usize + 4]
.copy_from_slice(&(self.input_pos as u32).to_le_bytes());
data[SYSVAR_NUM_TIB as usize..SYSVAR_NUM_TIB as usize + 4]
.copy_from_slice(&(len as u32).to_le_bytes());
}
Ok(()) Ok(())
} }
@@ -4204,13 +4253,15 @@ impl ForthVM {
.unwrap(); .unwrap();
let num_tib = u32::from_le_bytes(b); let num_tib = u32::from_le_bytes(b);
// Skip leading delimiters // Skip leading delimiters (also skip spaces when delimiter != space)
while to_in < num_tib { while to_in < num_tib {
let data = memory.data(&caller); let data = memory.data(&caller);
if data[(INPUT_BUFFER_BASE + to_in) as usize] != delim { let ch = data[(INPUT_BUFFER_BASE + to_in) as usize];
if ch == delim || (delim != b' ' && ch == b' ') {
to_in += 1;
} else {
break; break;
} }
to_in += 1;
} }
// Collect word // Collect word
@@ -4611,6 +4662,30 @@ impl ForthVM {
); );
self.register_host_primitive("REFILL", false, func)?; self.register_host_primitive("REFILL", false, func)?;
// ACCEPT ( c-addr +n1 -- +n2 ) receive up to +n1 characters.
// In non-interactive mode, return 0 (no input).
let memory = self.memory;
let dsp = self.dsp;
let func = Func::new(
&mut self.store,
FuncType::new(&self.engine, [], []),
move |mut caller, _params, _results| {
// Pop +n1 (max count) and c-addr from stack
let sp = dsp.get(&mut caller).unwrap_i32() as u32;
let new_sp = sp + CELL_SIZE; // pop +n1
let new_sp = new_sp + CELL_SIZE; // pop c-addr
// Push 0 (no characters received)
let result_sp = new_sp - CELL_SIZE;
let data = memory.data_mut(&mut caller);
data[result_sp as usize..result_sp as usize + 4]
.copy_from_slice(&0i32.to_le_bytes());
dsp.set(&mut caller, Val::I32(result_sp as i32))?;
Ok(())
},
);
self.register_host_primitive("ACCEPT", false, func)?;
Ok(()) Ok(())
} }
@@ -6657,6 +6732,22 @@ mod tests {
); );
} }
#[test]
fn test_plus_loop_leave_with_zero_step() {
// Regression: LEAVE inside +LOOP with step=0 caused infinite loop.
// LEAVE sets index=limit, but the XOR termination check yields 0 XOR 0 = 0
// (not negative), so the loop never exited without the leave flag.
let mut vm = ForthVM::new().unwrap();
vm.evaluate("VARIABLE INCRMNT VARIABLE ITERS").unwrap();
vm.evaluate(
": QD6 INCRMNT ! 0 ITERS ! ?DO 1 ITERS +! I ITERS @ 6 = IF LEAVE THEN INCRMNT @ +LOOP ITERS @ ;"
).unwrap();
vm.evaluate("-1 2 0 QD6").unwrap();
let stack = vm.data_stack();
// Expected: 2 2 2 2 2 2 6 (6 iterations of I=2, then ITERS@=6)
assert_eq!(stack, vec![6, 2, 2, 2, 2, 2, 2]);
}
// =================================================================== // ===================================================================
// New words: EVALUATE // New words: EVALUATE
// =================================================================== // ===================================================================