Release-mode benchmarks, UTIME word, consolidated promotion

Three changes:

1. Add UTIME host function ( -- ud ) for microsecond timing in Forth.
   Enables self-timed benchmarks matching gforth's utime approach.

2. Switch comparison benchmarks to release mode: builds wafer binary
   with --release, measures via UTIME (excludes startup overhead).
   Previously measured debug-mode Rust overhead, not WASM execution.

3. Add stack-to-local promotion to consolidated codegen path. Words
   that pass is_promotable now use the StackSim emit path even in
   CONSOLIDATE'd modules, preventing performance regression.

Release-mode results (WAFER beats gforth on 4/5 benchmarks):
  Factorial:    0.54x (2x faster)
  GCD:          0.50x (2x faster)
  NestedLoops:  0.10x (10x faster)
  Collatz:      0.31x (3x faster)
  Fibonacci:    1.47x (call overhead)
This commit is contained in:
2026-04-09 19:44:26 +02:00
parent 4cc71666d5
commit b1f7a5cc49
3 changed files with 146 additions and 33 deletions
+36
View File
@@ -2403,6 +2403,9 @@ impl ForthVM {
// UNUSED
self.register_unused()?;
// UTIME ( -- ud ) microseconds since epoch as double-cell
self.register_utime()?;
// HOLDS
// HOLDS: defined in boot.fth
@@ -5125,6 +5128,39 @@ impl ForthVM {
Ok(())
}
/// UTIME ( -- ud ) push microseconds since epoch as a double-cell value.
fn register_utime(&mut self) -> anyhow::Result<()> {
let memory = self.memory;
let dsp = self.dsp;
let func = Func::new(
&mut self.store,
FuncType::new(&self.engine, [], []),
move |mut caller, _params, _results| {
use std::time::{SystemTime, UNIX_EPOCH};
let us = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_micros() as u64;
let lo = us as i32;
let hi = (us >> 32) as i32;
// Push double: lo first (deeper), then hi on top
let sp = dsp.get(&mut caller).unwrap_i32() as u32;
let new_sp = sp - 2 * CELL_SIZE;
let data = memory.data_mut(&mut caller);
data[new_sp as usize..new_sp as usize + 4]
.copy_from_slice(&hi.to_le_bytes());
data[new_sp as usize + 4..new_sp as usize + 8]
.copy_from_slice(&lo.to_le_bytes());
dsp.set(&mut caller, Val::I32(new_sp as i32))?;
Ok(())
},
);
self.register_host_primitive("UTIME", false, func)?;
Ok(())
}
/// PARSE ( char "ccc<char>" -- c-addr u ) as inline host function.
fn register_parse_host(&mut self) -> anyhow::Result<()> {
let memory = self.memory;