From 9905399edba006795328ee10cafbc1035c778a23 Mon Sep 17 00:00:00 2001 From: Oleksandr Kozachuk Date: Wed, 15 Apr 2026 19:49:51 +0200 Subject: [PATCH] =?UTF-8?q?boot:=20fix=20`S`=20interpret-mode=20=E2=80=94?= =?UTF-8?q?=20copy=20string=20out=20of=20TIB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `S name` in interpret mode used to leave (c-addr u) pointing into the input buffer, so the next REFILL clobbered the bytes. Typing `s test` then `type` on a fresh line printed "pest" because the new input overwrote the first chars of the old TIB content. Move `S` from boot.fth to the Rust outer interpreter alongside `S"` / `C"`: both interpret and compile modes now copy the token to HERE-space (stable across REFILL). Compile-mode output is still bit-identical to writing `S" name"` inline. Adds `test_s_interpret_survives_refill` regression. --- crates/core/boot.fth | 19 ++++------------- crates/core/src/outer.rs | 46 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/crates/core/boot.fth b/crates/core/boot.fth index c7b69ac..e078062 100644 --- a/crates/core/boot.fth +++ b/crates/core/boot.fth @@ -306,18 +306,7 @@ \ .S keeps its Rust host function (complex stack introspection). -\ --------------------------------------------------------------- -\ Phase 8: Parse-ahead sugar -\ --------------------------------------------------------------- - -\ S ( "name" -- ) ( -- c-addr u at run time ) -\ -\ State-smart "quote the next whitespace-delimited token as a string". -\ Interpret-mode: leave ( c-addr u ) pointing into the input buffer. -\ Compile-mode: append run-time semantics that push the copied string -\ (identical to writing S" name" inline). -\ -\ This is the string analogue of [CHAR] (for chars) and ['] (for xts). -\ Comparable to Lisp's quote: S foo is to a string what ' foo is to an xt. -\ Inside a : definition the compiled code is bit-identical to S" foo". -: S PARSE-NAME STATE @ IF SLITERAL THEN ; IMMEDIATE +\ S ( "name" -- c-addr u ) +\ State-smart string literal for the next whitespace-delimited token. +\ Handled in Rust (outer.rs interpret_token_immediate / compile_token) +\ so the string survives REFILL in interpret mode. diff --git a/crates/core/src/outer.rs b/crates/core/src/outer.rs index 8d030a7..20f502d 100644 --- a/crates/core/src/outer.rs +++ b/crates/core/src/outer.rs @@ -671,6 +671,23 @@ impl ForthVM { } return Ok(()); } + if token_upper == "S" { + // State-smart string literal for the next whitespace-delimited token. + // Interpret mode: copy token bytes to HERE-space (stable across REFILL), + // push ( c-addr u ). Compile-mode branch lives in compile_token. + if let Some(name) = self.next_token() { + self.refresh_user_here(); + let addr = self.user_here; + let bytes = name.as_bytes(); + let len = bytes.len() as u32; + self.rt.mem_write_slice(addr, bytes); + self.user_here += len; + self.sync_here_cell(); + self.push_data_stack(addr as i32)?; + self.push_data_stack(len as i32)?; + } + return Ok(()); + } if token_upper == "(" { // Comment -- skip until ) self.parse_until(')'); @@ -820,6 +837,23 @@ impl ForthVM { } return Ok(()); } + if token_upper == "S" { + // Compile-mode twin of the interpret-mode S handler: parse next + // whitespace-delimited token, copy into HERE, compile ( c-addr u ) + // literals. Bit-identical to writing S" name" inline. + if let Some(name) = self.next_token() { + self.refresh_user_here(); + let addr = self.user_here; + let bytes = name.as_bytes(); + let len = bytes.len() as u32; + self.rt.mem_write_slice(addr, bytes); + self.user_here += len; + self.sync_here_cell(); + self.push_ir(IrOp::PushI32(addr as i32)); + self.push_ir(IrOp::PushI32(len as i32)); + } + return Ok(()); + } if token_upper == "(" { self.parse_until(')'); return Ok(()); @@ -7580,6 +7614,18 @@ mod tests { assert_eq!(vm.take_output(), "kelvar"); } + #[test] + fn test_s_interpret_survives_refill() { + // Regression: `S name` in interpret mode used to return an address + // pointing into TIB, so the next REFILL clobbered the string. + let mut vm = ForthVM::::new().unwrap(); + vm.evaluate("S test").unwrap(); + vm.evaluate(".S").unwrap(); + vm.take_output(); + vm.evaluate("TYPE").unwrap(); + assert_eq!(vm.take_output(), "test"); + } + // =================================================================== // New words: COUNT // ===================================================================