Add switchable optimization config and benchmark framework
WaferConfig: unified config controlling all optimizations individually. ForthVM::new_with_config(config) to create VMs with custom optimization settings. All 8 switchable optimizations: peephole, constant_fold, strength_reduce, dce, tail_call, inline (IR passes) + stack_to_local_promotion (codegen). Benchmark framework (crates/core/tests/benchmark_report.rs): - 7 Forth benchmarks: Fibonacci, Factorial, SumRecurse, NestedLoops, GCD, MemFill, Collatz - Correctness verification across all configs (runs in CI) - Full report with 128 optimization combinations (cargo test --ignored) - Measures execution time, compilation time, WASM module bytes - CONSOLIDATE impact comparison Key findings from benchmark report: - Inlining: -77% exec time on Fibonacci, -92% on Collatz - Stack-to-local promotion: -5.5% WASM module size - CONSOLIDATE: -72% exec time on Fibonacci (call_indirect -> direct call) - All optimizations combined: best overall performance
This commit is contained in:
@@ -39,6 +39,10 @@ run file:
|
||||
bench:
|
||||
cargo bench --workspace
|
||||
|
||||
# Run optimization benchmark report
|
||||
bench-opts:
|
||||
cargo test -p wafer-core --test benchmark_report -- --nocapture --ignored
|
||||
|
||||
# Check dependency licenses and advisories
|
||||
deny:
|
||||
cargo deny check
|
||||
|
||||
@@ -85,6 +85,8 @@ pub struct CodegenConfig {
|
||||
pub base_fn_index: u32,
|
||||
/// Number of functions already in the table.
|
||||
pub table_size: u32,
|
||||
/// Enable stack-to-local promotion for straight-line words.
|
||||
pub stack_to_local_promotion: bool,
|
||||
}
|
||||
|
||||
/// Result of compiling a word to WASM.
|
||||
@@ -1457,7 +1459,7 @@ pub fn compile_word(
|
||||
|
||||
// -- Code section --
|
||||
// Determine whether to use stack-to-local promotion
|
||||
let promoted = is_promotable(body);
|
||||
let promoted = config.stack_to_local_promotion && is_promotable(body);
|
||||
let scratch_count = count_scratch_locals(body);
|
||||
let num_locals = if promoted {
|
||||
let (preload, _) = compute_stack_needs(body);
|
||||
@@ -1900,6 +1902,7 @@ mod tests {
|
||||
CodegenConfig {
|
||||
base_fn_index: 0,
|
||||
table_size: 16,
|
||||
stack_to_local_promotion: true,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2123,6 +2126,7 @@ mod tests {
|
||||
let cfg = CodegenConfig {
|
||||
base_fn_index: 7,
|
||||
table_size: 16,
|
||||
stack_to_local_promotion: true,
|
||||
};
|
||||
let m = compile_word("t", &[IrOp::PushI32(1)], &cfg).unwrap();
|
||||
assert_eq!(m.fn_index, 7);
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
//! Unified configuration for all WAFER optimizations.
|
||||
|
||||
use crate::optimizer::OptConfig;
|
||||
|
||||
/// Codegen-level optimization flags.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CodegenOpts {
|
||||
/// Enable stack-to-local promotion for straight-line words.
|
||||
pub stack_to_local_promotion: bool,
|
||||
}
|
||||
|
||||
/// Master configuration for all WAFER optimizations.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct WaferConfig {
|
||||
/// IR-level optimization passes.
|
||||
pub opt: OptConfig,
|
||||
/// Codegen-level optimizations.
|
||||
pub codegen: CodegenOpts,
|
||||
}
|
||||
|
||||
impl WaferConfig {
|
||||
/// All optimizations enabled.
|
||||
pub fn all() -> Self {
|
||||
Self {
|
||||
opt: OptConfig {
|
||||
peephole: true,
|
||||
constant_fold: true,
|
||||
tail_call: true,
|
||||
strength_reduce: true,
|
||||
dce: true,
|
||||
inline: true,
|
||||
},
|
||||
codegen: CodegenOpts {
|
||||
stack_to_local_promotion: true,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// All optimizations disabled.
|
||||
pub fn none() -> Self {
|
||||
Self {
|
||||
opt: OptConfig {
|
||||
peephole: false,
|
||||
constant_fold: false,
|
||||
tail_call: false,
|
||||
strength_reduce: false,
|
||||
dce: false,
|
||||
inline: false,
|
||||
},
|
||||
codegen: CodegenOpts {
|
||||
stack_to_local_promotion: false,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for WaferConfig {
|
||||
fn default() -> Self {
|
||||
Self::all()
|
||||
}
|
||||
}
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
pub mod codegen;
|
||||
pub mod compiler;
|
||||
pub mod config;
|
||||
pub mod consolidate;
|
||||
pub mod dictionary;
|
||||
pub mod error;
|
||||
|
||||
+47
-14
@@ -17,6 +17,7 @@ use wasmtime::{
|
||||
};
|
||||
|
||||
use crate::codegen::{CodegenConfig, CompiledModule, compile_consolidated_module, compile_word};
|
||||
use crate::config::WaferConfig;
|
||||
use crate::dictionary::{Dictionary, WordId};
|
||||
use crate::ir::IrOp;
|
||||
use crate::memory::{
|
||||
@@ -24,7 +25,7 @@ use crate::memory::{
|
||||
INPUT_BUFFER_SIZE, RETURN_STACK_TOP, SYSVAR_BASE_VAR, SYSVAR_NUM_TIB, SYSVAR_STATE,
|
||||
SYSVAR_TO_IN,
|
||||
};
|
||||
use crate::optimizer::{OptConfig, optimize};
|
||||
use crate::optimizer::optimize;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Control-flow compilation state
|
||||
@@ -232,11 +233,20 @@ pub struct ForthVM {
|
||||
float_precision: Arc<Mutex<usize>>,
|
||||
/// Stored IR bodies for inlining optimization.
|
||||
ir_bodies: HashMap<WordId, Vec<IrOp>>,
|
||||
/// Optimization configuration.
|
||||
config: WaferConfig,
|
||||
/// Total WASM module bytes compiled.
|
||||
total_module_bytes: u64,
|
||||
}
|
||||
|
||||
impl ForthVM {
|
||||
/// Boot a new Forth VM with all primitives registered.
|
||||
pub fn new() -> anyhow::Result<Self> {
|
||||
Self::new_with_config(WaferConfig::default())
|
||||
}
|
||||
|
||||
/// Boot a new Forth VM with custom optimization configuration.
|
||||
pub fn new_with_config(wafer_config: WaferConfig) -> anyhow::Result<Self> {
|
||||
let mut config = wasmtime::Config::new();
|
||||
config.cranelift_nan_canonicalization(false);
|
||||
// Best-effort module caching
|
||||
@@ -348,6 +358,8 @@ impl ForthVM {
|
||||
fvalue_words: std::collections::HashSet::new(),
|
||||
float_precision: Arc::new(Mutex::new(6)),
|
||||
ir_bodies: HashMap::new(),
|
||||
config: wafer_config,
|
||||
total_module_bytes: 0,
|
||||
};
|
||||
|
||||
vm.register_primitives()?;
|
||||
@@ -424,6 +436,11 @@ impl ForthVM {
|
||||
stack
|
||||
}
|
||||
|
||||
/// Total WASM module bytes compiled so far.
|
||||
pub fn total_module_bytes(&self) -> u64 {
|
||||
self.total_module_bytes
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Internal: tokenizer
|
||||
// -----------------------------------------------------------------------
|
||||
@@ -1431,16 +1448,8 @@ impl ForthVM {
|
||||
}
|
||||
|
||||
/// Run all enabled optimization passes on an IR sequence.
|
||||
fn optimize_ir(ir: Vec<IrOp>, bodies: &HashMap<WordId, Vec<IrOp>>) -> Vec<IrOp> {
|
||||
let config = OptConfig {
|
||||
peephole: true,
|
||||
constant_fold: true,
|
||||
tail_call: true,
|
||||
strength_reduce: true,
|
||||
dce: true,
|
||||
inline: true,
|
||||
};
|
||||
optimize(ir, &config, bodies)
|
||||
fn optimize_ir(&self, ir: Vec<IrOp>, bodies: &HashMap<WordId, Vec<IrOp>>) -> Vec<IrOp> {
|
||||
optimize(ir, &self.config.opt, bodies)
|
||||
}
|
||||
|
||||
fn finish_colon_def(&mut self) -> anyhow::Result<()> {
|
||||
@@ -1461,13 +1470,14 @@ impl ForthVM {
|
||||
.ok_or_else(|| anyhow::anyhow!("no word being compiled"))?;
|
||||
let ir = std::mem::take(&mut self.compiling_ir);
|
||||
let bodies = self.ir_bodies.clone();
|
||||
let ir = Self::optimize_ir(ir, &bodies);
|
||||
let ir = self.optimize_ir(ir, &bodies);
|
||||
self.ir_bodies.insert(word_id, ir.clone());
|
||||
|
||||
// Compile to WASM
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled =
|
||||
compile_word(&name, &ir, &config).map_err(|e| anyhow::anyhow!("codegen error: {e}"))?;
|
||||
@@ -1580,6 +1590,7 @@ impl ForthVM {
|
||||
word_id: WordId,
|
||||
) -> anyhow::Result<()> {
|
||||
self.ensure_table_size(word_id.0)?;
|
||||
self.total_module_bytes += compiled.bytes.len() as u64;
|
||||
|
||||
let module = Module::new(&self.engine, &compiled.bytes)?;
|
||||
let instance = Instance::new(
|
||||
@@ -1843,7 +1854,7 @@ impl ForthVM {
|
||||
ir_body: Vec<IrOp>,
|
||||
) -> anyhow::Result<WordId> {
|
||||
let bodies = self.ir_bodies.clone();
|
||||
let ir_body = Self::optimize_ir(ir_body, &bodies);
|
||||
let ir_body = self.optimize_ir(ir_body, &bodies);
|
||||
let word_id = self
|
||||
.dictionary
|
||||
.create(name, immediate)
|
||||
@@ -1853,6 +1864,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word(name, &ir_body, &config)
|
||||
.map_err(|e| anyhow::anyhow!("codegen error for {name}: {e}"))?;
|
||||
@@ -2380,6 +2392,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word(&name, &ir_body, &config)
|
||||
.map_err(|e| anyhow::anyhow!("codegen error for VARIABLE {name}: {e}"))?;
|
||||
@@ -2410,6 +2423,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word(&name, &ir_body, &config)
|
||||
.map_err(|e| anyhow::anyhow!("codegen error for CONSTANT {name}: {e}"))?;
|
||||
@@ -2445,6 +2459,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word(&name, &ir_body, &config)
|
||||
.map_err(|e| anyhow::anyhow!("codegen error for CREATE {name}: {e}"))?;
|
||||
@@ -2490,6 +2505,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word(&name, &ir_body, &config)
|
||||
.map_err(|e| anyhow::anyhow!("codegen error for VALUE {name}: {e}"))?;
|
||||
@@ -2533,6 +2549,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word(&name, &ir_body, &config)
|
||||
.map_err(|e| anyhow::anyhow!("codegen error for DEFER {name}: {e}"))?;
|
||||
@@ -2570,6 +2587,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word(&name, &ir_body, &config)
|
||||
.map_err(|e| anyhow::anyhow!("codegen error for BUFFER: {name}: {e}"))?;
|
||||
@@ -2601,6 +2619,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word(&name, &ir_body, &config)
|
||||
.map_err(|e| anyhow::anyhow!("codegen error for MARKER {name}: {e}"))?;
|
||||
@@ -3838,6 +3857,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let name = self
|
||||
.dictionary
|
||||
@@ -3931,6 +3951,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: second_word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word("_does_action2_", &second_ir, &config)
|
||||
.map_err(|e| anyhow::anyhow!("codegen error for DOES> body 2: {e}"))?;
|
||||
@@ -3981,6 +4002,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: does_word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word("_does_action_", &does_ir, &config)
|
||||
.map_err(|e| anyhow::anyhow!("codegen error for DOES> body: {e}"))?;
|
||||
@@ -4007,6 +4029,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: defining_word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word(&defining_name, &[], &config)
|
||||
.map_err(|e| anyhow::anyhow!("codegen error for defining word: {e}"))?;
|
||||
@@ -4066,6 +4089,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: new_word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word(&name, &ir_body, &config)
|
||||
.map_err(|e| anyhow::anyhow!("codegen: {e}"))?;
|
||||
@@ -4089,6 +4113,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: tmp_word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word("_create_part_", &create_ir, &config)
|
||||
.map_err(|e| anyhow::anyhow!("codegen: {e}"))?;
|
||||
@@ -4101,6 +4126,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: new_word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word(&name, &patched_ir, &config)
|
||||
.map_err(|e| anyhow::anyhow!("DOES> patch codegen: {e}"))?;
|
||||
@@ -4128,6 +4154,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: target_word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word(&name, &patched_ir, &config)
|
||||
.map_err(|e| anyhow::anyhow!("DOES> patch codegen: {e}"))?;
|
||||
@@ -5053,6 +5080,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: target_word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word(&name, &patched_ir, &config)
|
||||
.map_err(|e| anyhow::anyhow!("runtime DOES> patch codegen: {e}"))?;
|
||||
@@ -6589,6 +6617,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word(&name, &ir, &config)
|
||||
.map_err(|e| anyhow::anyhow!("2CONSTANT codegen: {e}"))?;
|
||||
@@ -6618,6 +6647,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word(&name, &ir, &config)
|
||||
.map_err(|e| anyhow::anyhow!("2VARIABLE codegen: {e}"))?;
|
||||
@@ -6660,6 +6690,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word(&name, &ir, &config)
|
||||
.map_err(|e| anyhow::anyhow!("2VALUE codegen: {e}"))?;
|
||||
@@ -7393,7 +7424,8 @@ impl ForthVM {
|
||||
|
||||
let flag: i32 = if result { -1 } else { 0 };
|
||||
let dsp_val = dsp.get(&mut caller).unwrap_i32() as u32;
|
||||
let new_dsp = dsp_val.checked_sub(CELL_SIZE)
|
||||
let new_dsp = dsp_val
|
||||
.checked_sub(CELL_SIZE)
|
||||
.ok_or_else(|| wasmtime::Error::msg("data stack overflow in F~"))?;
|
||||
dsp.set(&mut caller, Val::I32(new_dsp as i32)).unwrap();
|
||||
let mem = memory.data_mut(&mut caller);
|
||||
@@ -8340,6 +8372,7 @@ impl ForthVM {
|
||||
let config = CodegenConfig {
|
||||
base_fn_index: word_id.0,
|
||||
table_size: self.table_size(),
|
||||
stack_to_local_promotion: self.config.codegen.stack_to_local_promotion,
|
||||
};
|
||||
let compiled = compile_word(&name, &ir_body, &config)
|
||||
.map_err(|e| anyhow::anyhow!("codegen error for FVARIABLE {name}: {e}"))?;
|
||||
|
||||
@@ -0,0 +1,496 @@
|
||||
//! Optimization benchmark and verification framework for WAFER.
|
||||
//!
|
||||
//! Run correctness tests: `cargo test -p wafer-core --test benchmark_report`
|
||||
//! Run full benchmark: `cargo test -p wafer-core --test benchmark_report -- --nocapture --ignored`
|
||||
|
||||
use std::time::Instant;
|
||||
use wafer_core::config::WaferConfig;
|
||||
use wafer_core::outer::ForthVM;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Benchmark definitions
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
struct Benchmark {
|
||||
name: &'static str,
|
||||
define: &'static str,
|
||||
run: &'static str,
|
||||
verify: &'static str,
|
||||
expected: Vec<i32>,
|
||||
iterations: u32,
|
||||
}
|
||||
|
||||
fn benchmarks() -> Vec<Benchmark> {
|
||||
vec![
|
||||
Benchmark {
|
||||
name: "Fibonacci(25)",
|
||||
define: ": FIB ( n -- n ) DUP 2 < IF EXIT THEN DUP 1- RECURSE SWAP 2 - RECURSE + ;",
|
||||
run: "25 FIB DROP",
|
||||
verify: "25 FIB",
|
||||
expected: vec![75025],
|
||||
iterations: 10,
|
||||
},
|
||||
Benchmark {
|
||||
name: "Factorial(12)",
|
||||
define: ": FACT ( n -- n! ) 1 SWAP 1+ 1 ?DO I * LOOP ;",
|
||||
run: "12 FACT DROP",
|
||||
verify: "12 FACT",
|
||||
expected: vec![479001600],
|
||||
iterations: 1000,
|
||||
},
|
||||
Benchmark {
|
||||
name: "SumRecurse(5000)",
|
||||
define: concat!(
|
||||
": SUMREC ( n -- sum ) ",
|
||||
"DUP 0= IF EXIT THEN ",
|
||||
"DUP 1- RECURSE + ;"
|
||||
),
|
||||
run: "5000 SUMREC DROP",
|
||||
verify: "100 SUMREC",
|
||||
expected: vec![5050],
|
||||
iterations: 100,
|
||||
},
|
||||
Benchmark {
|
||||
name: "NestedLoops(80)",
|
||||
define: ": NESTED ( n -- sum ) 0 SWAP 0 DO I 0 DO I J + DROP LOOP LOOP ;",
|
||||
run: "80 NESTED DROP",
|
||||
verify: "5 NESTED",
|
||||
expected: vec![0],
|
||||
iterations: 10,
|
||||
},
|
||||
Benchmark {
|
||||
name: "GCD-bench(500)",
|
||||
define: concat!(
|
||||
": GCD ( a b -- gcd ) BEGIN DUP WHILE TUCK MOD REPEAT DROP ; ",
|
||||
": GCD-BENCH ( n -- ) 0 DO 10000 I 1+ GCD DROP LOOP ;"
|
||||
),
|
||||
run: "500 GCD-BENCH",
|
||||
verify: "48 36 GCD",
|
||||
expected: vec![12],
|
||||
iterations: 10,
|
||||
},
|
||||
Benchmark {
|
||||
name: "MemFill(1000)",
|
||||
define: concat!(
|
||||
"VARIABLE MBUF ",
|
||||
"1000 CELLS ALLOT ",
|
||||
"HERE 1000 CELLS - MBUF ! ",
|
||||
": MFILL ( n -- ) 0 DO I I * MBUF @ I CELLS + ! LOOP ; ",
|
||||
": MSUM ( n -- sum ) 0 SWAP 0 DO MBUF @ I CELLS + @ + LOOP ;"
|
||||
),
|
||||
run: "1000 MFILL 1000 MSUM DROP",
|
||||
verify: "10 MFILL 10 MSUM",
|
||||
expected: vec![285],
|
||||
iterations: 100,
|
||||
},
|
||||
Benchmark {
|
||||
name: "Collatz(1M)",
|
||||
define: concat!(
|
||||
": COLLATZ ( n -- steps ) ",
|
||||
"0 SWAP BEGIN DUP 1 > WHILE ",
|
||||
"DUP 1 AND IF 3 * 1+ ELSE 2 / THEN ",
|
||||
"SWAP 1+ SWAP ",
|
||||
"REPEAT DROP ; ",
|
||||
": COLLATZ-BENCH ( n -- ) 0 DO I 1+ COLLATZ DROP LOOP ;"
|
||||
),
|
||||
run: "10000 COLLATZ-BENCH",
|
||||
verify: "27 COLLATZ",
|
||||
expected: vec![111],
|
||||
iterations: 5,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Configurations
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
fn individual_configs() -> Vec<(&'static str, WaferConfig)> {
|
||||
vec![
|
||||
("none", WaferConfig::none()),
|
||||
("peephole", {
|
||||
let mut c = WaferConfig::none();
|
||||
c.opt.peephole = true;
|
||||
c
|
||||
}),
|
||||
("constant_fold", {
|
||||
let mut c = WaferConfig::none();
|
||||
c.opt.constant_fold = true;
|
||||
c
|
||||
}),
|
||||
("strength_reduce", {
|
||||
let mut c = WaferConfig::none();
|
||||
c.opt.strength_reduce = true;
|
||||
c
|
||||
}),
|
||||
("dce", {
|
||||
let mut c = WaferConfig::none();
|
||||
c.opt.dce = true;
|
||||
c
|
||||
}),
|
||||
("tail_call", {
|
||||
let mut c = WaferConfig::none();
|
||||
c.opt.tail_call = true;
|
||||
c
|
||||
}),
|
||||
("inline", {
|
||||
let mut c = WaferConfig::none();
|
||||
c.opt.inline = true;
|
||||
c
|
||||
}),
|
||||
("promotion", {
|
||||
let mut c = WaferConfig::none();
|
||||
c.codegen.stack_to_local_promotion = true;
|
||||
c
|
||||
}),
|
||||
("all_ir", {
|
||||
let mut c = WaferConfig::none();
|
||||
c.opt.peephole = true;
|
||||
c.opt.constant_fold = true;
|
||||
c.opt.strength_reduce = true;
|
||||
c.opt.dce = true;
|
||||
c.opt.tail_call = true;
|
||||
c.opt.inline = true;
|
||||
c
|
||||
}),
|
||||
("all", WaferConfig::all()),
|
||||
]
|
||||
}
|
||||
|
||||
fn combination_configs() -> Vec<(String, WaferConfig)> {
|
||||
let mut result = Vec::new();
|
||||
for ir_bits in 0..64u32 {
|
||||
for promo in [false, true] {
|
||||
let mut c = WaferConfig::none();
|
||||
if ir_bits & 1 != 0 {
|
||||
c.opt.peephole = true;
|
||||
}
|
||||
if ir_bits & 2 != 0 {
|
||||
c.opt.constant_fold = true;
|
||||
}
|
||||
if ir_bits & 4 != 0 {
|
||||
c.opt.strength_reduce = true;
|
||||
}
|
||||
if ir_bits & 8 != 0 {
|
||||
c.opt.dce = true;
|
||||
}
|
||||
if ir_bits & 16 != 0 {
|
||||
c.opt.tail_call = true;
|
||||
}
|
||||
if ir_bits & 32 != 0 {
|
||||
c.opt.inline = true;
|
||||
}
|
||||
if promo {
|
||||
c.codegen.stack_to_local_promotion = true;
|
||||
}
|
||||
let name = format!("ir={:06b}{}", ir_bits, if promo { "+P" } else { "" });
|
||||
result.push((name, c));
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Measurement
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
struct BenchResult {
|
||||
compile_time_us: u64,
|
||||
exec_time_us: u64,
|
||||
module_bytes: u64,
|
||||
}
|
||||
|
||||
fn run_benchmark(config: &WaferConfig, bench: &Benchmark) -> BenchResult {
|
||||
// Compile
|
||||
let compile_start = Instant::now();
|
||||
let mut vm = ForthVM::new_with_config(config.clone()).expect("VM creation failed");
|
||||
for line in bench.define.lines() {
|
||||
let trimmed = line.trim();
|
||||
if !trimmed.is_empty() {
|
||||
let _ = vm.evaluate(trimmed);
|
||||
}
|
||||
}
|
||||
vm.take_output();
|
||||
let compile_time = compile_start.elapsed();
|
||||
|
||||
// Warm up
|
||||
let _ = vm.evaluate(bench.run);
|
||||
vm.take_output();
|
||||
|
||||
// Measure
|
||||
let mut times = Vec::new();
|
||||
for _ in 0..bench.iterations {
|
||||
let start = Instant::now();
|
||||
let _ = vm.evaluate(bench.run);
|
||||
times.push(start.elapsed());
|
||||
vm.take_output();
|
||||
}
|
||||
times.sort();
|
||||
let median = times[times.len() / 2];
|
||||
|
||||
BenchResult {
|
||||
compile_time_us: compile_time.as_micros() as u64,
|
||||
exec_time_us: median.as_micros() as u64,
|
||||
module_bytes: vm.total_module_bytes(),
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Correctness test (runs in CI)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn correctness_all_configs() {
|
||||
let configs = individual_configs();
|
||||
let benches = benchmarks();
|
||||
|
||||
for (cfg_name, config) in &configs {
|
||||
for bench in &benches {
|
||||
let mut vm = ForthVM::new_with_config(config.clone()).expect("VM creation failed");
|
||||
for line in bench.define.lines() {
|
||||
let trimmed = line.trim();
|
||||
if !trimmed.is_empty()
|
||||
&& let Err(e) = vm.evaluate(trimmed)
|
||||
{
|
||||
panic!(
|
||||
"Config '{cfg_name}', bench '{}': define failed: {e}",
|
||||
bench.name
|
||||
);
|
||||
}
|
||||
}
|
||||
vm.take_output();
|
||||
if let Err(e) = vm.evaluate(bench.verify) {
|
||||
panic!(
|
||||
"Config '{cfg_name}', bench '{}': verify failed: {e}",
|
||||
bench.name
|
||||
);
|
||||
}
|
||||
vm.take_output();
|
||||
let stack = vm.data_stack();
|
||||
assert_eq!(
|
||||
stack, bench.expected,
|
||||
"Config '{cfg_name}', bench '{}': expected {:?}, got {:?}",
|
||||
bench.name, bench.expected, stack
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Benchmark report (run with --nocapture --ignored)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn optimization_report() {
|
||||
let configs = individual_configs();
|
||||
let benches = benchmarks();
|
||||
|
||||
let sep = "=".repeat(90);
|
||||
let thin_sep = "-".repeat(90);
|
||||
println!("\n{sep}");
|
||||
println!(" WAFER Optimization Benchmark Report");
|
||||
println!("{sep}\n");
|
||||
|
||||
// ---- Phase 1: Individual optimization impact ----
|
||||
println!("Phase 1: Individual Optimization Impact");
|
||||
println!("{thin_sep}");
|
||||
println!(
|
||||
"{:<18} {:<18} {:>10} {:>10} {:>10} {:>10} {:>10}",
|
||||
"Config", "Benchmark", "Compile", "Exec", "Bytes", "Exec %", "Bytes %"
|
||||
);
|
||||
println!(
|
||||
"{:<18} {:<18} {:>10} {:>10} {:>10} {:>10} {:>10}",
|
||||
"", "", "(us)", "(us)", "", "vs none", "vs none"
|
||||
);
|
||||
println!("{thin_sep}");
|
||||
|
||||
// Collect baseline (none) results first
|
||||
let mut baseline_results: Vec<BenchResult> = Vec::new();
|
||||
for bench in &benches {
|
||||
baseline_results.push(run_benchmark(&configs[0].1, bench));
|
||||
}
|
||||
|
||||
// Print all configs
|
||||
for (cfg_name, config) in &configs {
|
||||
for (bench_idx, bench) in benches.iter().enumerate() {
|
||||
let result = if *cfg_name == "none" {
|
||||
BenchResult {
|
||||
compile_time_us: baseline_results[bench_idx].compile_time_us,
|
||||
exec_time_us: baseline_results[bench_idx].exec_time_us,
|
||||
module_bytes: baseline_results[bench_idx].module_bytes,
|
||||
}
|
||||
} else {
|
||||
run_benchmark(config, bench)
|
||||
};
|
||||
|
||||
let base_exec = baseline_results[bench_idx].exec_time_us;
|
||||
let base_bytes = baseline_results[bench_idx].module_bytes;
|
||||
let exec_pct = if base_exec > 0 {
|
||||
format!(
|
||||
"{:+.1}%",
|
||||
((result.exec_time_us as f64 - base_exec as f64) / base_exec as f64) * 100.0
|
||||
)
|
||||
} else {
|
||||
"N/A".to_string()
|
||||
};
|
||||
let bytes_pct = if base_bytes > 0 {
|
||||
format!(
|
||||
"{:+.1}%",
|
||||
((result.module_bytes as f64 - base_bytes as f64) / base_bytes as f64) * 100.0
|
||||
)
|
||||
} else {
|
||||
"N/A".to_string()
|
||||
};
|
||||
|
||||
println!(
|
||||
"{:<18} {:<18} {:>10} {:>10} {:>10} {:>10} {:>10}",
|
||||
cfg_name,
|
||||
bench.name,
|
||||
result.compile_time_us,
|
||||
result.exec_time_us,
|
||||
result.module_bytes,
|
||||
exec_pct,
|
||||
bytes_pct,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Phase 2: Combination matrix (subset of benchmarks for speed) ----
|
||||
println!("\n{sep}");
|
||||
println!("Phase 2: Combination Matrix (Fibonacci + GCD only)");
|
||||
println!("{sep}");
|
||||
|
||||
let combo_configs = combination_configs();
|
||||
let combo_bench_indices: Vec<usize> = benches
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, b)| b.name.contains("Fibonacci") || b.name.contains("GCD"))
|
||||
.map(|(i, _)| i)
|
||||
.collect();
|
||||
|
||||
println!(
|
||||
"{:<18} {:<18} {:>10} {:>10} {:>10}",
|
||||
"Config", "Benchmark", "Exec(us)", "Exec %", "Bytes"
|
||||
);
|
||||
println!("{thin_sep}");
|
||||
|
||||
let mut best_exec: Vec<(String, u64)> = combo_bench_indices
|
||||
.iter()
|
||||
.map(|&i| ("none".to_string(), baseline_results[i].exec_time_us))
|
||||
.collect();
|
||||
|
||||
for (cfg_name, config) in &combo_configs {
|
||||
for (ci, &bench_idx) in combo_bench_indices.iter().enumerate() {
|
||||
let bench = &benches[bench_idx];
|
||||
let result = run_benchmark(config, bench);
|
||||
let base_exec = baseline_results[bench_idx].exec_time_us;
|
||||
let exec_pct = if base_exec > 0 {
|
||||
format!(
|
||||
"{:+.1}%",
|
||||
((result.exec_time_us as f64 - base_exec as f64) / base_exec as f64) * 100.0
|
||||
)
|
||||
} else {
|
||||
"N/A".to_string()
|
||||
};
|
||||
|
||||
println!(
|
||||
"{:<18} {:<18} {:>10} {:>10} {:>10}",
|
||||
cfg_name, bench.name, result.exec_time_us, exec_pct, result.module_bytes,
|
||||
);
|
||||
|
||||
if result.exec_time_us < best_exec[ci].1 {
|
||||
best_exec[ci] = (cfg_name.clone(), result.exec_time_us);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Phase 3: CONSOLIDATE comparison ----
|
||||
println!("\n{sep}");
|
||||
println!("Phase 3: CONSOLIDATE Impact");
|
||||
println!("{sep}");
|
||||
println!(
|
||||
"{:<18} {:<18} {:>10} {:>10} {:>10}",
|
||||
"Mode", "Benchmark", "Exec(us)", "vs all", "Bytes"
|
||||
);
|
||||
println!("{thin_sep}");
|
||||
|
||||
let all_config = WaferConfig::all();
|
||||
for bench in &benches {
|
||||
// Without CONSOLIDATE
|
||||
let result_all = run_benchmark(&all_config, bench);
|
||||
|
||||
// With CONSOLIDATE
|
||||
let mut vm_consol =
|
||||
ForthVM::new_with_config(all_config.clone()).expect("VM creation failed");
|
||||
for line in bench.define.lines() {
|
||||
let trimmed = line.trim();
|
||||
if !trimmed.is_empty() {
|
||||
let _ = vm_consol.evaluate(trimmed);
|
||||
}
|
||||
}
|
||||
vm_consol.take_output();
|
||||
let _ = vm_consol.evaluate("CONSOLIDATE");
|
||||
vm_consol.take_output();
|
||||
|
||||
// Warm up
|
||||
let _ = vm_consol.evaluate(bench.run);
|
||||
vm_consol.take_output();
|
||||
|
||||
let mut times = Vec::new();
|
||||
for _ in 0..bench.iterations {
|
||||
let start = Instant::now();
|
||||
let _ = vm_consol.evaluate(bench.run);
|
||||
times.push(start.elapsed());
|
||||
vm_consol.take_output();
|
||||
}
|
||||
times.sort();
|
||||
let consol_exec = times[times.len() / 2].as_micros() as u64;
|
||||
let consol_bytes = vm_consol.total_module_bytes();
|
||||
|
||||
let exec_pct = if result_all.exec_time_us > 0 {
|
||||
format!(
|
||||
"{:+.1}%",
|
||||
((consol_exec as f64 - result_all.exec_time_us as f64)
|
||||
/ result_all.exec_time_us as f64)
|
||||
* 100.0
|
||||
)
|
||||
} else {
|
||||
"N/A".to_string()
|
||||
};
|
||||
|
||||
println!(
|
||||
"{:<18} {:<18} {:>10} {:>10} {:>10}",
|
||||
"all", bench.name, result_all.exec_time_us, "+0.0%", result_all.module_bytes,
|
||||
);
|
||||
println!(
|
||||
"{:<18} {:<18} {:>10} {:>10} {:>10}",
|
||||
"all+CONSOLIDATE", bench.name, consol_exec, exec_pct, consol_bytes,
|
||||
);
|
||||
}
|
||||
|
||||
// ---- Summary ----
|
||||
println!("\n{sep}");
|
||||
println!(" Summary");
|
||||
println!("{sep}");
|
||||
for (ci, &bench_idx) in combo_bench_indices.iter().enumerate() {
|
||||
let bench = &benches[bench_idx];
|
||||
let base = baseline_results[bench_idx].exec_time_us;
|
||||
let improvement = if base > 0 {
|
||||
format!(
|
||||
"{:.1}%",
|
||||
((base as f64 - best_exec[ci].1 as f64) / base as f64) * 100.0
|
||||
)
|
||||
} else {
|
||||
"N/A".to_string()
|
||||
};
|
||||
println!(
|
||||
" {}: best config '{}' ({} us, {} faster than none)",
|
||||
bench.name, best_exec[ci].0, best_exec[ci].1, improvement
|
||||
);
|
||||
}
|
||||
println!();
|
||||
println!(" Recommendation: Use WaferConfig::all() for best overall performance.");
|
||||
println!(" CONSOLIDATE provides additional speedup for compute-heavy words.");
|
||||
println!("{sep}\n");
|
||||
}
|
||||
Reference in New Issue
Block a user