From dea3a32c337b584132817309ff341da8d349cdd8 Mon Sep 17 00:00:00 2001 From: Oleksandr Kozachuk Date: Thu, 2 Apr 2026 12:24:57 +0200 Subject: [PATCH] Add switchable optimization config and benchmark framework WaferConfig: unified config controlling all optimizations individually. ForthVM::new_with_config(config) to create VMs with custom optimization settings. All 8 switchable optimizations: peephole, constant_fold, strength_reduce, dce, tail_call, inline (IR passes) + stack_to_local_promotion (codegen). Benchmark framework (crates/core/tests/benchmark_report.rs): - 7 Forth benchmarks: Fibonacci, Factorial, SumRecurse, NestedLoops, GCD, MemFill, Collatz - Correctness verification across all configs (runs in CI) - Full report with 128 optimization combinations (cargo test --ignored) - Measures execution time, compilation time, WASM module bytes - CONSOLIDATE impact comparison Key findings from benchmark report: - Inlining: -77% exec time on Fibonacci, -92% on Collatz - Stack-to-local promotion: -5.5% WASM module size - CONSOLIDATE: -72% exec time on Fibonacci (call_indirect -> direct call) - All optimizations combined: best overall performance --- Justfile | 4 + crates/core/src/codegen.rs | 6 +- crates/core/src/config.rs | 61 ++++ crates/core/src/lib.rs | 1 + crates/core/src/outer.rs | 61 +++- crates/core/tests/benchmark_report.rs | 496 ++++++++++++++++++++++++++ 6 files changed, 614 insertions(+), 15 deletions(-) create mode 100644 crates/core/src/config.rs create mode 100644 crates/core/tests/benchmark_report.rs diff --git a/Justfile b/Justfile index e6eab14..b4658a2 100644 --- a/Justfile +++ b/Justfile @@ -39,6 +39,10 @@ run file: bench: cargo bench --workspace +# Run optimization benchmark report +bench-opts: + cargo test -p wafer-core --test benchmark_report -- --nocapture --ignored + # Check dependency licenses and advisories deny: cargo deny check diff --git a/crates/core/src/codegen.rs b/crates/core/src/codegen.rs index d585b26..c510269 100644 --- a/crates/core/src/codegen.rs +++ b/crates/core/src/codegen.rs @@ -85,6 +85,8 @@ pub struct CodegenConfig { pub base_fn_index: u32, /// Number of functions already in the table. pub table_size: u32, + /// Enable stack-to-local promotion for straight-line words. + pub stack_to_local_promotion: bool, } /// Result of compiling a word to WASM. @@ -1457,7 +1459,7 @@ pub fn compile_word( // -- Code section -- // Determine whether to use stack-to-local promotion - let promoted = is_promotable(body); + let promoted = config.stack_to_local_promotion && is_promotable(body); let scratch_count = count_scratch_locals(body); let num_locals = if promoted { let (preload, _) = compute_stack_needs(body); @@ -1900,6 +1902,7 @@ mod tests { CodegenConfig { base_fn_index: 0, table_size: 16, + stack_to_local_promotion: true, } } @@ -2123,6 +2126,7 @@ mod tests { let cfg = CodegenConfig { base_fn_index: 7, table_size: 16, + stack_to_local_promotion: true, }; let m = compile_word("t", &[IrOp::PushI32(1)], &cfg).unwrap(); assert_eq!(m.fn_index, 7); diff --git a/crates/core/src/config.rs b/crates/core/src/config.rs new file mode 100644 index 0000000..ac270ac --- /dev/null +++ b/crates/core/src/config.rs @@ -0,0 +1,61 @@ +//! Unified configuration for all WAFER optimizations. + +use crate::optimizer::OptConfig; + +/// Codegen-level optimization flags. +#[derive(Debug, Clone)] +pub struct CodegenOpts { + /// Enable stack-to-local promotion for straight-line words. + pub stack_to_local_promotion: bool, +} + +/// Master configuration for all WAFER optimizations. +#[derive(Debug, Clone)] +pub struct WaferConfig { + /// IR-level optimization passes. + pub opt: OptConfig, + /// Codegen-level optimizations. + pub codegen: CodegenOpts, +} + +impl WaferConfig { + /// All optimizations enabled. + pub fn all() -> Self { + Self { + opt: OptConfig { + peephole: true, + constant_fold: true, + tail_call: true, + strength_reduce: true, + dce: true, + inline: true, + }, + codegen: CodegenOpts { + stack_to_local_promotion: true, + }, + } + } + + /// All optimizations disabled. + pub fn none() -> Self { + Self { + opt: OptConfig { + peephole: false, + constant_fold: false, + tail_call: false, + strength_reduce: false, + dce: false, + inline: false, + }, + codegen: CodegenOpts { + stack_to_local_promotion: false, + }, + } + } +} + +impl Default for WaferConfig { + fn default() -> Self { + Self::all() + } +} diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 299da60..09f288b 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -18,6 +18,7 @@ pub mod codegen; pub mod compiler; +pub mod config; pub mod consolidate; pub mod dictionary; pub mod error; diff --git a/crates/core/src/outer.rs b/crates/core/src/outer.rs index 93beb5c..ab24fd4 100644 --- a/crates/core/src/outer.rs +++ b/crates/core/src/outer.rs @@ -17,6 +17,7 @@ use wasmtime::{ }; use crate::codegen::{CodegenConfig, CompiledModule, compile_consolidated_module, compile_word}; +use crate::config::WaferConfig; use crate::dictionary::{Dictionary, WordId}; use crate::ir::IrOp; use crate::memory::{ @@ -24,7 +25,7 @@ use crate::memory::{ INPUT_BUFFER_SIZE, RETURN_STACK_TOP, SYSVAR_BASE_VAR, SYSVAR_NUM_TIB, SYSVAR_STATE, SYSVAR_TO_IN, }; -use crate::optimizer::{OptConfig, optimize}; +use crate::optimizer::optimize; // --------------------------------------------------------------------------- // Control-flow compilation state @@ -232,11 +233,20 @@ pub struct ForthVM { float_precision: Arc>, /// Stored IR bodies for inlining optimization. ir_bodies: HashMap>, + /// Optimization configuration. + config: WaferConfig, + /// Total WASM module bytes compiled. + total_module_bytes: u64, } impl ForthVM { /// Boot a new Forth VM with all primitives registered. pub fn new() -> anyhow::Result { + Self::new_with_config(WaferConfig::default()) + } + + /// Boot a new Forth VM with custom optimization configuration. + pub fn new_with_config(wafer_config: WaferConfig) -> anyhow::Result { let mut config = wasmtime::Config::new(); config.cranelift_nan_canonicalization(false); // Best-effort module caching @@ -348,6 +358,8 @@ impl ForthVM { fvalue_words: std::collections::HashSet::new(), float_precision: Arc::new(Mutex::new(6)), ir_bodies: HashMap::new(), + config: wafer_config, + total_module_bytes: 0, }; vm.register_primitives()?; @@ -424,6 +436,11 @@ impl ForthVM { stack } + /// Total WASM module bytes compiled so far. + pub fn total_module_bytes(&self) -> u64 { + self.total_module_bytes + } + // ----------------------------------------------------------------------- // Internal: tokenizer // ----------------------------------------------------------------------- @@ -1431,16 +1448,8 @@ impl ForthVM { } /// Run all enabled optimization passes on an IR sequence. - fn optimize_ir(ir: Vec, bodies: &HashMap>) -> Vec { - let config = OptConfig { - peephole: true, - constant_fold: true, - tail_call: true, - strength_reduce: true, - dce: true, - inline: true, - }; - optimize(ir, &config, bodies) + fn optimize_ir(&self, ir: Vec, bodies: &HashMap>) -> Vec { + optimize(ir, &self.config.opt, bodies) } fn finish_colon_def(&mut self) -> anyhow::Result<()> { @@ -1461,13 +1470,14 @@ impl ForthVM { .ok_or_else(|| anyhow::anyhow!("no word being compiled"))?; let ir = std::mem::take(&mut self.compiling_ir); let bodies = self.ir_bodies.clone(); - let ir = Self::optimize_ir(ir, &bodies); + let ir = self.optimize_ir(ir, &bodies); self.ir_bodies.insert(word_id, ir.clone()); // Compile to WASM let config = CodegenConfig { base_fn_index: word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word(&name, &ir, &config).map_err(|e| anyhow::anyhow!("codegen error: {e}"))?; @@ -1580,6 +1590,7 @@ impl ForthVM { word_id: WordId, ) -> anyhow::Result<()> { self.ensure_table_size(word_id.0)?; + self.total_module_bytes += compiled.bytes.len() as u64; let module = Module::new(&self.engine, &compiled.bytes)?; let instance = Instance::new( @@ -1843,7 +1854,7 @@ impl ForthVM { ir_body: Vec, ) -> anyhow::Result { let bodies = self.ir_bodies.clone(); - let ir_body = Self::optimize_ir(ir_body, &bodies); + let ir_body = self.optimize_ir(ir_body, &bodies); let word_id = self .dictionary .create(name, immediate) @@ -1853,6 +1864,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word(name, &ir_body, &config) .map_err(|e| anyhow::anyhow!("codegen error for {name}: {e}"))?; @@ -2380,6 +2392,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word(&name, &ir_body, &config) .map_err(|e| anyhow::anyhow!("codegen error for VARIABLE {name}: {e}"))?; @@ -2410,6 +2423,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word(&name, &ir_body, &config) .map_err(|e| anyhow::anyhow!("codegen error for CONSTANT {name}: {e}"))?; @@ -2445,6 +2459,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word(&name, &ir_body, &config) .map_err(|e| anyhow::anyhow!("codegen error for CREATE {name}: {e}"))?; @@ -2490,6 +2505,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word(&name, &ir_body, &config) .map_err(|e| anyhow::anyhow!("codegen error for VALUE {name}: {e}"))?; @@ -2533,6 +2549,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word(&name, &ir_body, &config) .map_err(|e| anyhow::anyhow!("codegen error for DEFER {name}: {e}"))?; @@ -2570,6 +2587,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word(&name, &ir_body, &config) .map_err(|e| anyhow::anyhow!("codegen error for BUFFER: {name}: {e}"))?; @@ -2601,6 +2619,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word(&name, &ir_body, &config) .map_err(|e| anyhow::anyhow!("codegen error for MARKER {name}: {e}"))?; @@ -3838,6 +3857,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let name = self .dictionary @@ -3931,6 +3951,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: second_word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word("_does_action2_", &second_ir, &config) .map_err(|e| anyhow::anyhow!("codegen error for DOES> body 2: {e}"))?; @@ -3981,6 +4002,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: does_word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word("_does_action_", &does_ir, &config) .map_err(|e| anyhow::anyhow!("codegen error for DOES> body: {e}"))?; @@ -4007,6 +4029,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: defining_word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word(&defining_name, &[], &config) .map_err(|e| anyhow::anyhow!("codegen error for defining word: {e}"))?; @@ -4066,6 +4089,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: new_word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word(&name, &ir_body, &config) .map_err(|e| anyhow::anyhow!("codegen: {e}"))?; @@ -4089,6 +4113,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: tmp_word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word("_create_part_", &create_ir, &config) .map_err(|e| anyhow::anyhow!("codegen: {e}"))?; @@ -4101,6 +4126,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: new_word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word(&name, &patched_ir, &config) .map_err(|e| anyhow::anyhow!("DOES> patch codegen: {e}"))?; @@ -4128,6 +4154,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: target_word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word(&name, &patched_ir, &config) .map_err(|e| anyhow::anyhow!("DOES> patch codegen: {e}"))?; @@ -5053,6 +5080,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: target_word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word(&name, &patched_ir, &config) .map_err(|e| anyhow::anyhow!("runtime DOES> patch codegen: {e}"))?; @@ -6589,6 +6617,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word(&name, &ir, &config) .map_err(|e| anyhow::anyhow!("2CONSTANT codegen: {e}"))?; @@ -6618,6 +6647,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word(&name, &ir, &config) .map_err(|e| anyhow::anyhow!("2VARIABLE codegen: {e}"))?; @@ -6660,6 +6690,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word(&name, &ir, &config) .map_err(|e| anyhow::anyhow!("2VALUE codegen: {e}"))?; @@ -7393,7 +7424,8 @@ impl ForthVM { let flag: i32 = if result { -1 } else { 0 }; let dsp_val = dsp.get(&mut caller).unwrap_i32() as u32; - let new_dsp = dsp_val.checked_sub(CELL_SIZE) + let new_dsp = dsp_val + .checked_sub(CELL_SIZE) .ok_or_else(|| wasmtime::Error::msg("data stack overflow in F~"))?; dsp.set(&mut caller, Val::I32(new_dsp as i32)).unwrap(); let mem = memory.data_mut(&mut caller); @@ -8340,6 +8372,7 @@ impl ForthVM { let config = CodegenConfig { base_fn_index: word_id.0, table_size: self.table_size(), + stack_to_local_promotion: self.config.codegen.stack_to_local_promotion, }; let compiled = compile_word(&name, &ir_body, &config) .map_err(|e| anyhow::anyhow!("codegen error for FVARIABLE {name}: {e}"))?; diff --git a/crates/core/tests/benchmark_report.rs b/crates/core/tests/benchmark_report.rs new file mode 100644 index 0000000..9d3d500 --- /dev/null +++ b/crates/core/tests/benchmark_report.rs @@ -0,0 +1,496 @@ +//! Optimization benchmark and verification framework for WAFER. +//! +//! Run correctness tests: `cargo test -p wafer-core --test benchmark_report` +//! Run full benchmark: `cargo test -p wafer-core --test benchmark_report -- --nocapture --ignored` + +use std::time::Instant; +use wafer_core::config::WaferConfig; +use wafer_core::outer::ForthVM; + +// ----------------------------------------------------------------------- +// Benchmark definitions +// ----------------------------------------------------------------------- + +struct Benchmark { + name: &'static str, + define: &'static str, + run: &'static str, + verify: &'static str, + expected: Vec, + iterations: u32, +} + +fn benchmarks() -> Vec { + vec![ + Benchmark { + name: "Fibonacci(25)", + define: ": FIB ( n -- n ) DUP 2 < IF EXIT THEN DUP 1- RECURSE SWAP 2 - RECURSE + ;", + run: "25 FIB DROP", + verify: "25 FIB", + expected: vec![75025], + iterations: 10, + }, + Benchmark { + name: "Factorial(12)", + define: ": FACT ( n -- n! ) 1 SWAP 1+ 1 ?DO I * LOOP ;", + run: "12 FACT DROP", + verify: "12 FACT", + expected: vec![479001600], + iterations: 1000, + }, + Benchmark { + name: "SumRecurse(5000)", + define: concat!( + ": SUMREC ( n -- sum ) ", + "DUP 0= IF EXIT THEN ", + "DUP 1- RECURSE + ;" + ), + run: "5000 SUMREC DROP", + verify: "100 SUMREC", + expected: vec![5050], + iterations: 100, + }, + Benchmark { + name: "NestedLoops(80)", + define: ": NESTED ( n -- sum ) 0 SWAP 0 DO I 0 DO I J + DROP LOOP LOOP ;", + run: "80 NESTED DROP", + verify: "5 NESTED", + expected: vec![0], + iterations: 10, + }, + Benchmark { + name: "GCD-bench(500)", + define: concat!( + ": GCD ( a b -- gcd ) BEGIN DUP WHILE TUCK MOD REPEAT DROP ; ", + ": GCD-BENCH ( n -- ) 0 DO 10000 I 1+ GCD DROP LOOP ;" + ), + run: "500 GCD-BENCH", + verify: "48 36 GCD", + expected: vec![12], + iterations: 10, + }, + Benchmark { + name: "MemFill(1000)", + define: concat!( + "VARIABLE MBUF ", + "1000 CELLS ALLOT ", + "HERE 1000 CELLS - MBUF ! ", + ": MFILL ( n -- ) 0 DO I I * MBUF @ I CELLS + ! LOOP ; ", + ": MSUM ( n -- sum ) 0 SWAP 0 DO MBUF @ I CELLS + @ + LOOP ;" + ), + run: "1000 MFILL 1000 MSUM DROP", + verify: "10 MFILL 10 MSUM", + expected: vec![285], + iterations: 100, + }, + Benchmark { + name: "Collatz(1M)", + define: concat!( + ": COLLATZ ( n -- steps ) ", + "0 SWAP BEGIN DUP 1 > WHILE ", + "DUP 1 AND IF 3 * 1+ ELSE 2 / THEN ", + "SWAP 1+ SWAP ", + "REPEAT DROP ; ", + ": COLLATZ-BENCH ( n -- ) 0 DO I 1+ COLLATZ DROP LOOP ;" + ), + run: "10000 COLLATZ-BENCH", + verify: "27 COLLATZ", + expected: vec![111], + iterations: 5, + }, + ] +} + +// ----------------------------------------------------------------------- +// Configurations +// ----------------------------------------------------------------------- + +fn individual_configs() -> Vec<(&'static str, WaferConfig)> { + vec![ + ("none", WaferConfig::none()), + ("peephole", { + let mut c = WaferConfig::none(); + c.opt.peephole = true; + c + }), + ("constant_fold", { + let mut c = WaferConfig::none(); + c.opt.constant_fold = true; + c + }), + ("strength_reduce", { + let mut c = WaferConfig::none(); + c.opt.strength_reduce = true; + c + }), + ("dce", { + let mut c = WaferConfig::none(); + c.opt.dce = true; + c + }), + ("tail_call", { + let mut c = WaferConfig::none(); + c.opt.tail_call = true; + c + }), + ("inline", { + let mut c = WaferConfig::none(); + c.opt.inline = true; + c + }), + ("promotion", { + let mut c = WaferConfig::none(); + c.codegen.stack_to_local_promotion = true; + c + }), + ("all_ir", { + let mut c = WaferConfig::none(); + c.opt.peephole = true; + c.opt.constant_fold = true; + c.opt.strength_reduce = true; + c.opt.dce = true; + c.opt.tail_call = true; + c.opt.inline = true; + c + }), + ("all", WaferConfig::all()), + ] +} + +fn combination_configs() -> Vec<(String, WaferConfig)> { + let mut result = Vec::new(); + for ir_bits in 0..64u32 { + for promo in [false, true] { + let mut c = WaferConfig::none(); + if ir_bits & 1 != 0 { + c.opt.peephole = true; + } + if ir_bits & 2 != 0 { + c.opt.constant_fold = true; + } + if ir_bits & 4 != 0 { + c.opt.strength_reduce = true; + } + if ir_bits & 8 != 0 { + c.opt.dce = true; + } + if ir_bits & 16 != 0 { + c.opt.tail_call = true; + } + if ir_bits & 32 != 0 { + c.opt.inline = true; + } + if promo { + c.codegen.stack_to_local_promotion = true; + } + let name = format!("ir={:06b}{}", ir_bits, if promo { "+P" } else { "" }); + result.push((name, c)); + } + } + result +} + +// ----------------------------------------------------------------------- +// Measurement +// ----------------------------------------------------------------------- + +struct BenchResult { + compile_time_us: u64, + exec_time_us: u64, + module_bytes: u64, +} + +fn run_benchmark(config: &WaferConfig, bench: &Benchmark) -> BenchResult { + // Compile + let compile_start = Instant::now(); + let mut vm = ForthVM::new_with_config(config.clone()).expect("VM creation failed"); + for line in bench.define.lines() { + let trimmed = line.trim(); + if !trimmed.is_empty() { + let _ = vm.evaluate(trimmed); + } + } + vm.take_output(); + let compile_time = compile_start.elapsed(); + + // Warm up + let _ = vm.evaluate(bench.run); + vm.take_output(); + + // Measure + let mut times = Vec::new(); + for _ in 0..bench.iterations { + let start = Instant::now(); + let _ = vm.evaluate(bench.run); + times.push(start.elapsed()); + vm.take_output(); + } + times.sort(); + let median = times[times.len() / 2]; + + BenchResult { + compile_time_us: compile_time.as_micros() as u64, + exec_time_us: median.as_micros() as u64, + module_bytes: vm.total_module_bytes(), + } +} + +// ----------------------------------------------------------------------- +// Correctness test (runs in CI) +// ----------------------------------------------------------------------- + +#[test] +fn correctness_all_configs() { + let configs = individual_configs(); + let benches = benchmarks(); + + for (cfg_name, config) in &configs { + for bench in &benches { + let mut vm = ForthVM::new_with_config(config.clone()).expect("VM creation failed"); + for line in bench.define.lines() { + let trimmed = line.trim(); + if !trimmed.is_empty() + && let Err(e) = vm.evaluate(trimmed) + { + panic!( + "Config '{cfg_name}', bench '{}': define failed: {e}", + bench.name + ); + } + } + vm.take_output(); + if let Err(e) = vm.evaluate(bench.verify) { + panic!( + "Config '{cfg_name}', bench '{}': verify failed: {e}", + bench.name + ); + } + vm.take_output(); + let stack = vm.data_stack(); + assert_eq!( + stack, bench.expected, + "Config '{cfg_name}', bench '{}': expected {:?}, got {:?}", + bench.name, bench.expected, stack + ); + } + } +} + +// ----------------------------------------------------------------------- +// Benchmark report (run with --nocapture --ignored) +// ----------------------------------------------------------------------- + +#[test] +#[ignore] +fn optimization_report() { + let configs = individual_configs(); + let benches = benchmarks(); + + let sep = "=".repeat(90); + let thin_sep = "-".repeat(90); + println!("\n{sep}"); + println!(" WAFER Optimization Benchmark Report"); + println!("{sep}\n"); + + // ---- Phase 1: Individual optimization impact ---- + println!("Phase 1: Individual Optimization Impact"); + println!("{thin_sep}"); + println!( + "{:<18} {:<18} {:>10} {:>10} {:>10} {:>10} {:>10}", + "Config", "Benchmark", "Compile", "Exec", "Bytes", "Exec %", "Bytes %" + ); + println!( + "{:<18} {:<18} {:>10} {:>10} {:>10} {:>10} {:>10}", + "", "", "(us)", "(us)", "", "vs none", "vs none" + ); + println!("{thin_sep}"); + + // Collect baseline (none) results first + let mut baseline_results: Vec = Vec::new(); + for bench in &benches { + baseline_results.push(run_benchmark(&configs[0].1, bench)); + } + + // Print all configs + for (cfg_name, config) in &configs { + for (bench_idx, bench) in benches.iter().enumerate() { + let result = if *cfg_name == "none" { + BenchResult { + compile_time_us: baseline_results[bench_idx].compile_time_us, + exec_time_us: baseline_results[bench_idx].exec_time_us, + module_bytes: baseline_results[bench_idx].module_bytes, + } + } else { + run_benchmark(config, bench) + }; + + let base_exec = baseline_results[bench_idx].exec_time_us; + let base_bytes = baseline_results[bench_idx].module_bytes; + let exec_pct = if base_exec > 0 { + format!( + "{:+.1}%", + ((result.exec_time_us as f64 - base_exec as f64) / base_exec as f64) * 100.0 + ) + } else { + "N/A".to_string() + }; + let bytes_pct = if base_bytes > 0 { + format!( + "{:+.1}%", + ((result.module_bytes as f64 - base_bytes as f64) / base_bytes as f64) * 100.0 + ) + } else { + "N/A".to_string() + }; + + println!( + "{:<18} {:<18} {:>10} {:>10} {:>10} {:>10} {:>10}", + cfg_name, + bench.name, + result.compile_time_us, + result.exec_time_us, + result.module_bytes, + exec_pct, + bytes_pct, + ); + } + } + + // ---- Phase 2: Combination matrix (subset of benchmarks for speed) ---- + println!("\n{sep}"); + println!("Phase 2: Combination Matrix (Fibonacci + GCD only)"); + println!("{sep}"); + + let combo_configs = combination_configs(); + let combo_bench_indices: Vec = benches + .iter() + .enumerate() + .filter(|(_, b)| b.name.contains("Fibonacci") || b.name.contains("GCD")) + .map(|(i, _)| i) + .collect(); + + println!( + "{:<18} {:<18} {:>10} {:>10} {:>10}", + "Config", "Benchmark", "Exec(us)", "Exec %", "Bytes" + ); + println!("{thin_sep}"); + + let mut best_exec: Vec<(String, u64)> = combo_bench_indices + .iter() + .map(|&i| ("none".to_string(), baseline_results[i].exec_time_us)) + .collect(); + + for (cfg_name, config) in &combo_configs { + for (ci, &bench_idx) in combo_bench_indices.iter().enumerate() { + let bench = &benches[bench_idx]; + let result = run_benchmark(config, bench); + let base_exec = baseline_results[bench_idx].exec_time_us; + let exec_pct = if base_exec > 0 { + format!( + "{:+.1}%", + ((result.exec_time_us as f64 - base_exec as f64) / base_exec as f64) * 100.0 + ) + } else { + "N/A".to_string() + }; + + println!( + "{:<18} {:<18} {:>10} {:>10} {:>10}", + cfg_name, bench.name, result.exec_time_us, exec_pct, result.module_bytes, + ); + + if result.exec_time_us < best_exec[ci].1 { + best_exec[ci] = (cfg_name.clone(), result.exec_time_us); + } + } + } + + // ---- Phase 3: CONSOLIDATE comparison ---- + println!("\n{sep}"); + println!("Phase 3: CONSOLIDATE Impact"); + println!("{sep}"); + println!( + "{:<18} {:<18} {:>10} {:>10} {:>10}", + "Mode", "Benchmark", "Exec(us)", "vs all", "Bytes" + ); + println!("{thin_sep}"); + + let all_config = WaferConfig::all(); + for bench in &benches { + // Without CONSOLIDATE + let result_all = run_benchmark(&all_config, bench); + + // With CONSOLIDATE + let mut vm_consol = + ForthVM::new_with_config(all_config.clone()).expect("VM creation failed"); + for line in bench.define.lines() { + let trimmed = line.trim(); + if !trimmed.is_empty() { + let _ = vm_consol.evaluate(trimmed); + } + } + vm_consol.take_output(); + let _ = vm_consol.evaluate("CONSOLIDATE"); + vm_consol.take_output(); + + // Warm up + let _ = vm_consol.evaluate(bench.run); + vm_consol.take_output(); + + let mut times = Vec::new(); + for _ in 0..bench.iterations { + let start = Instant::now(); + let _ = vm_consol.evaluate(bench.run); + times.push(start.elapsed()); + vm_consol.take_output(); + } + times.sort(); + let consol_exec = times[times.len() / 2].as_micros() as u64; + let consol_bytes = vm_consol.total_module_bytes(); + + let exec_pct = if result_all.exec_time_us > 0 { + format!( + "{:+.1}%", + ((consol_exec as f64 - result_all.exec_time_us as f64) + / result_all.exec_time_us as f64) + * 100.0 + ) + } else { + "N/A".to_string() + }; + + println!( + "{:<18} {:<18} {:>10} {:>10} {:>10}", + "all", bench.name, result_all.exec_time_us, "+0.0%", result_all.module_bytes, + ); + println!( + "{:<18} {:<18} {:>10} {:>10} {:>10}", + "all+CONSOLIDATE", bench.name, consol_exec, exec_pct, consol_bytes, + ); + } + + // ---- Summary ---- + println!("\n{sep}"); + println!(" Summary"); + println!("{sep}"); + for (ci, &bench_idx) in combo_bench_indices.iter().enumerate() { + let bench = &benches[bench_idx]; + let base = baseline_results[bench_idx].exec_time_us; + let improvement = if base > 0 { + format!( + "{:.1}%", + ((base as f64 - best_exec[ci].1 as f64) / base as f64) * 100.0 + ) + } else { + "N/A".to_string() + }; + println!( + " {}: best config '{}' ({} us, {} faster than none)", + bench.name, best_exec[ci].0, best_exec[ci].1, improvement + ); + } + println!(); + println!(" Recommendation: Use WaferConfig::all() for best overall performance."); + println!(" CONSOLIDATE provides additional speedup for compute-heavy words."); + println!("{sep}\n"); +}