boot: fix S interpret-mode — copy string out of TIB

`S name` in interpret mode used to leave (c-addr u) pointing into the
input buffer, so the next REFILL clobbered the bytes. Typing `s test`
then `type` on a fresh line printed "pest" because the new input
overwrote the first chars of the old TIB content.

Move `S` from boot.fth to the Rust outer interpreter alongside `S"` /
`C"`: both interpret and compile modes now copy the token to HERE-space
(stable across REFILL). Compile-mode output is still bit-identical to
writing `S" name"` inline.

Adds `test_s_interpret_survives_refill` regression.
This commit is contained in:
2026-04-15 19:49:51 +02:00
parent 1b8f4835d6
commit d1a7d55051
2 changed files with 50 additions and 15 deletions
+4 -15
View File
@@ -306,18 +306,7 @@
\ .S keeps its Rust host function (complex stack introspection).
\ ---------------------------------------------------------------
\ Phase 8: Parse-ahead sugar
\ ---------------------------------------------------------------
\ S ( "<spaces>name<space>" -- ) ( -- c-addr u at run time )
\
\ State-smart "quote the next whitespace-delimited token as a string".
\ Interpret-mode: leave ( c-addr u ) pointing into the input buffer.
\ Compile-mode: append run-time semantics that push the copied string
\ (identical to writing S" name" inline).
\
\ This is the string analogue of [CHAR] (for chars) and ['] (for xts).
\ Comparable to Lisp's quote: S foo is to a string what ' foo is to an xt.
\ Inside a : definition the compiled code is bit-identical to S" foo".
: S PARSE-NAME STATE @ IF SLITERAL THEN ; IMMEDIATE
\ S ( "<spaces>name<space>" -- c-addr u )
\ State-smart string literal for the next whitespace-delimited token.
\ Handled in Rust (outer.rs interpret_token_immediate / compile_token)
\ so the string survives REFILL in interpret mode.
+46
View File
@@ -671,6 +671,23 @@ impl<R: Runtime> ForthVM<R> {
}
return Ok(());
}
if token_upper == "S" {
// State-smart string literal for the next whitespace-delimited token.
// Interpret mode: copy token bytes to HERE-space (stable across REFILL),
// push ( c-addr u ). Compile-mode branch lives in compile_token.
if let Some(name) = self.next_token() {
self.refresh_user_here();
let addr = self.user_here;
let bytes = name.as_bytes();
let len = bytes.len() as u32;
self.rt.mem_write_slice(addr, bytes);
self.user_here += len;
self.sync_here_cell();
self.push_data_stack(addr as i32)?;
self.push_data_stack(len as i32)?;
}
return Ok(());
}
if token_upper == "(" {
// Comment -- skip until )
self.parse_until(')');
@@ -820,6 +837,23 @@ impl<R: Runtime> ForthVM<R> {
}
return Ok(());
}
if token_upper == "S" {
// Compile-mode twin of the interpret-mode S handler: parse next
// whitespace-delimited token, copy into HERE, compile ( c-addr u )
// literals. Bit-identical to writing S" name" inline.
if let Some(name) = self.next_token() {
self.refresh_user_here();
let addr = self.user_here;
let bytes = name.as_bytes();
let len = bytes.len() as u32;
self.rt.mem_write_slice(addr, bytes);
self.user_here += len;
self.sync_here_cell();
self.push_ir(IrOp::PushI32(addr as i32));
self.push_ir(IrOp::PushI32(len as i32));
}
return Ok(());
}
if token_upper == "(" {
self.parse_until(')');
return Ok(());
@@ -7580,6 +7614,18 @@ mod tests {
assert_eq!(vm.take_output(), "kelvar");
}
#[test]
fn test_s_interpret_survives_refill() {
// Regression: `S name` in interpret mode used to return an address
// pointing into TIB, so the next REFILL clobbered the string.
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
vm.evaluate("S test").unwrap();
vm.evaluate(".S").unwrap();
vm.take_output();
vm.evaluate("TYPE").unwrap();
assert_eq!(vm.take_output(), "test");
}
// ===================================================================
// New words: COUNT
// ===================================================================