WAFER/crates/core/tests/comparison.rs

#![allow(dead_code)]
//! Cross-engine comparison tests: WAFER vs gforth.
//!
//! Validates that WAFER produces identical output to gforth for standard
//! Forth programs, and benchmarks performance of both engines.
//!
//! WAFER-only correctness:  `cargo test -p wafer-core --test comparison`
//! Full comparison + perf:  `cargo test -p wafer-core --test comparison -- --nocapture --ignored`

use std::process::Command;
use std::sync::OnceLock;

use wafer_core::config::WaferConfig;
use wafer_core::outer::ForthVM;

// -----------------------------------------------------------------------
// Gforth discovery (cached)
// -----------------------------------------------------------------------

static GFORTH_PATH: OnceLock<Option<String>> = OnceLock::new();
static GFORTH_FAST_PATH: OnceLock<Option<String>> = OnceLock::new();

fn probe_gforth(candidate: &str) -> bool {
    Command::new(candidate)
        .arg("-e")
        .arg("bye")
        .output()
        .map(|o| o.status.success())
        .unwrap_or(false)
}

fn find_gforth() -> Option<&'static str> {
    GFORTH_PATH
        .get_or_init(|| {
            for candidate in &["/opt/homebrew/bin/gforth", "/usr/local/bin/gforth", "gforth"] {
                if probe_gforth(candidate) {
                    return Some(candidate.to_string());
                }
            }
            None
        })
        .as_deref()
}

fn find_gforth_fast() -> Option<&'static str> {
    GFORTH_FAST_PATH
        .get_or_init(|| {
            for candidate in &[
                "/opt/homebrew/bin/gforth-fast",
                "/usr/local/bin/gforth-fast",
                "gforth-fast",
            ] {
                if probe_gforth(candidate) {
                    return Some(candidate.to_string());
                }
            }
            None
        })
        .as_deref()
}

// -----------------------------------------------------------------------
// Engine runners
// -----------------------------------------------------------------------

struct EngineResult {
    output: String,
    success: bool,
}

/// Run Forth code through WAFER (in-process via `ForthVM`).
fn run_wafer(code: &str) -> EngineResult {
    let mut vm = ForthVM::new().expect("Failed to create ForthVM");
    let mut output = String::new();
    for line in code.lines() {
        let trimmed = line.trim();
        if trimmed.is_empty() {
            continue;
        }
        match vm.evaluate(trimmed) {
            Ok(()) => output.push_str(&vm.take_output()),
            Err(_) => {
                return EngineResult {
                    output,
                    success: false,
                }
            }
        }
    }
    EngineResult {
        output,
        success: true,
    }
}

/// Run Forth code through WAFER with all optimizations enabled.
fn run_wafer_optimized(code: &str) -> EngineResult {
    let mut vm = ForthVM::new_with_config(WaferConfig::all()).expect("Failed to create ForthVM");
    let mut output = String::new();
    for line in code.lines() {
        let trimmed = line.trim();
        if trimmed.is_empty() {
            continue;
        }
        match vm.evaluate(trimmed) {
            Ok(()) => output.push_str(&vm.take_output()),
            Err(_) => {
                return EngineResult {
                    output,
                    success: false,
                }
            }
        }
    }
    EngineResult {
        output,
        success: true,
    }
}

/// Run Forth code through gforth. Returns `None` if gforth is unavailable.
fn run_gforth_engine(gforth: &str, code: &str) -> Option<EngineResult> {
    // Flatten to single line and append bye
    let flat = code
        .lines()
        .map(str::trim)
        .filter(|l| !l.is_empty())
        .collect::<Vec<_>>()
        .join(" ");
    let with_bye = if flat.ends_with("bye") || flat.ends_with("BYE") {
        flat
    } else {
        format!("{flat} bye")
    };
    let output = Command::new(gforth).arg("-e").arg(&with_bye).output().ok()?;
    Some(EngineResult {
        output: String::from_utf8_lossy(&output.stdout).into_owned(),
        success: output.status.success(),
    })
}

fn run_gforth(code: &str) -> Option<EngineResult> {
    run_gforth_engine(find_gforth()?, code)
}

fn run_gforth_fast(code: &str) -> Option<EngineResult> {
    run_gforth_engine(find_gforth_fast()?, code)
}

// -----------------------------------------------------------------------
// Output normalization
// -----------------------------------------------------------------------

/// Normalize Forth output for comparison: trim trailing whitespace per line,
/// collapse to single trailing newline.
fn normalize(s: &str) -> String {
    let trimmed: Vec<&str> = s.lines().map(str::trim_end).collect();
    let mut result = trimmed.join("\n");
    // Ensure exactly one trailing newline (or empty if no content)
    let end = result.trim_end_matches('\n');
    if !end.is_empty() {
        result = format!("{end}\n");
    } else {
        result.clear();
    }
    result
}

// -----------------------------------------------------------------------
// Assertion helpers
// -----------------------------------------------------------------------

/// Assert that WAFER produces the expected output for a program.
fn assert_wafer_output(name: &str, code: &str, expected: &str) {
    let result = run_wafer(code);
    assert!(result.success, "{name}: WAFER execution failed");
    assert_eq!(
        normalize(&result.output),
        normalize(expected),
        "{name}: WAFER output mismatch\n  got:      {:?}\n  expected: {:?}",
        result.output,
        expected
    );
}

/// Assert that WAFER and gforth produce identical output.
/// Skips gracefully if gforth is unavailable.
fn assert_same_output(name: &str, code: &str) {
    let wafer = run_wafer(code);
    assert!(wafer.success, "{name}: WAFER execution failed");

    let Some(gforth) = run_gforth(code) else {
        eprintln!("  SKIP {name}: gforth not available");
        return;
    };
    assert!(gforth.success, "{name}: gforth execution failed");
    assert_eq!(
        normalize(&wafer.output),
        normalize(&gforth.output),
        "{name}: output differs\n  WAFER:  {:?}\n  gforth: {:?}",
        wafer.output,
        gforth.output
    );
}

// -----------------------------------------------------------------------
// Test program catalog
// -----------------------------------------------------------------------

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Category {
    Arithmetic,
    StackOps,
    ControlFlow,
    Loops,
    Definitions,
    Strings,
    Recursion,
    Memory,
}

struct Program {
    name: &'static str,
    code: &'static str,
    expected: &'static str,
    category: Category,
}

fn programs() -> Vec<Program> {
    vec![
        // -- Arithmetic --
        Program {
            name: "add",
            code: "2 3 + . CR",
            expected: "5 \n",
            category: Category::Arithmetic,
        },
        Program {
            name: "subtract",
            code: "10 3 - . CR",
            expected: "7 \n",
            category: Category::Arithmetic,
        },
        Program {
            name: "multiply",
            code: "6 7 * . CR",
            expected: "42 \n",
            category: Category::Arithmetic,
        },
        Program {
            name: "divide",
            code: "100 7 / . CR",
            expected: "14 \n",
            category: Category::Arithmetic,
        },
        Program {
            name: "mod",
            code: "100 7 MOD . CR",
            expected: "2 \n",
            category: Category::Arithmetic,
        },
        Program {
            name: "negate",
            code: "7 NEGATE . CR",
            expected: "-7 \n",
            category: Category::Arithmetic,
        },
        Program {
            name: "abs",
            code: "5 ABS . CR -5 ABS . CR",
            expected: "5 \n5 \n",
            category: Category::Arithmetic,
        },
        Program {
            name: "min-max",
            code: "3 7 MIN . CR 3 7 MAX . CR",
            expected: "3 \n7 \n",
            category: Category::Arithmetic,
        },
        Program {
            name: "divmod",
            code: "100 7 /MOD . . CR",
            expected: "14 2 \n",
            category: Category::Arithmetic,
        },
        // -- Stack operations --
        Program {
            name: "swap",
            code: "1 2 SWAP . . CR",
            expected: "1 2 \n",
            category: Category::StackOps,
        },
        Program {
            name: "dup",
            code: "5 DUP . . CR",
            expected: "5 5 \n",
            category: Category::StackOps,
        },
        Program {
            name: "over",
            code: "1 2 OVER . . . CR",
            expected: "1 2 1 \n",
            category: Category::StackOps,
        },
        Program {
            name: "rot",
            code: "1 2 3 ROT . . . CR",
            expected: "1 3 2 \n",
            category: Category::StackOps,
        },
        Program {
            name: "2dup",
            code: "1 2 2DUP . . . . CR",
            expected: "2 1 2 1 \n",
            category: Category::StackOps,
        },
        Program {
            name: "depth",
            code: "1 2 3 DEPTH . DROP DROP DROP CR",
            expected: "3 \n",
            category: Category::StackOps,
        },
        // -- Control flow --
        Program {
            name: "if-else",
            code: ": SGN DUP 0> IF DROP 1 ELSE DUP 0< IF DROP -1 ELSE DROP 0 THEN THEN ;\n\
                   5 SGN . CR -3 SGN . CR 0 SGN . CR",
            expected: "1 \n-1 \n0 \n",
            category: Category::ControlFlow,
        },
        Program {
            name: "max-word",
            code: ": MAX2 2DUP < IF SWAP THEN DROP ;\n\
                   3 7 MAX2 . CR 9 2 MAX2 . CR",
            expected: "7 \n9 \n",
            category: Category::ControlFlow,
        },
        Program {
            name: "abs-word",
            code: ": MYABS DUP 0< IF NEGATE THEN ;\n\
                   -5 MYABS . CR 3 MYABS . CR 0 MYABS . CR",
            expected: "5 \n3 \n0 \n",
            category: Category::ControlFlow,
        },
        // -- Loops --
        Program {
            name: "do-loop",
            code: ": SUM10 0 10 0 DO I + LOOP ; SUM10 . CR",
            expected: "45 \n",
            category: Category::Loops,
        },
        Program {
            name: "do-loop-emit",
            code: ": COUNTDOWN 5 0 DO I . LOOP CR ; COUNTDOWN",
            expected: "0 1 2 3 4 \n",
            category: Category::Loops,
        },
        Program {
            name: "plus-loop",
            code: ": SUM-EVEN 0 10 0 DO I + 2 +LOOP ; SUM-EVEN . CR",
            expected: "20 \n",
            category: Category::Loops,
        },
        Program {
            name: "begin-until",
            code: ": COUNT-DOWN 5 BEGIN DUP . 1- DUP 0= UNTIL DROP CR ; COUNT-DOWN",
            expected: "5 4 3 2 1 \n",
            category: Category::Loops,
        },
        Program {
            name: "begin-while-repeat",
            code: ": COUNT-UP 0 BEGIN DUP 5 < WHILE DUP . 1+ REPEAT DROP CR ; COUNT-UP",
            expected: "0 1 2 3 4 \n",
            category: Category::Loops,
        },
        // -- Definitions --
        Program {
            name: "variable",
            code: "VARIABLE X 42 X ! X @ . CR",
            expected: "42 \n",
            category: Category::Definitions,
        },
        Program {
            name: "constant",
            code: "7 CONSTANT SEVEN SEVEN . CR",
            expected: "7 \n",
            category: Category::Definitions,
        },
        Program {
            name: "colon-def",
            code: ": SQUARE DUP * ; 6 SQUARE . CR 11 SQUARE . CR",
            expected: "36 \n121 \n",
            category: Category::Definitions,
        },
        Program {
            name: "create-does",
            code: ": CONST CREATE , DOES> @ ;\n\
                   99 CONST NINETY-NINE\n\
                   NINETY-NINE . CR",
            expected: "99 \n",
            category: Category::Definitions,
        },
        // -- Strings --
        Program {
            name: "s-quote-type",
            code: "S\" hello\" TYPE CR",
            expected: "hello\n",
            category: Category::Strings,
        },
        Program {
            name: "dot-quote",
            code: ".\" world\" CR",
            expected: "world\n",
            category: Category::Strings,
        },
        Program {
            name: "char-emit",
            code: ": EMIT-AB [CHAR] A EMIT [CHAR] B EMIT ; EMIT-AB CR",
            expected: "AB\n",
            category: Category::Strings,
        },
        // -- Recursion --
        Program {
            name: "fibonacci",
            code: ": FIB DUP 2 < IF EXIT THEN DUP 1- RECURSE SWAP 2 - RECURSE + ;\n\
                   25 FIB . CR",
            expected: "75025 \n",
            category: Category::Recursion,
        },
        Program {
            name: "factorial",
            code: ": FACT 1 SWAP 1+ 1 ?DO I * LOOP ; 12 FACT . CR",
            expected: "479001600 \n",
            category: Category::Recursion,
        },
        Program {
            name: "gcd",
            code: ": GCD BEGIN DUP WHILE TUCK MOD REPEAT DROP ; 48 36 GCD . CR",
            expected: "12 \n",
            category: Category::Recursion,
        },
        // -- Memory --
        Program {
            name: "create-allot",
            code: "CREATE ARR 5 CELLS ALLOT\n\
                   99 ARR 3 CELLS + !\n\
                   ARR 3 CELLS + @ . CR",
            expected: "99 \n",
            category: Category::Memory,
        },
        Program {
            name: "fill-sum",
            code: "CREATE BUF 10 CELLS ALLOT\n\
                   : FILL-BUF 10 0 DO I I * BUF I CELLS + ! LOOP ;\n\
                   : SUM-BUF 0 10 0 DO BUF I CELLS + @ + LOOP ;\n\
                   FILL-BUF SUM-BUF . CR",
            expected: "285 \n",
            category: Category::Memory,
        },
    ]
}

// -----------------------------------------------------------------------
// WAFER-only correctness tests (always run in CI)
// -----------------------------------------------------------------------

fn run_category(cat: Category) {
    for prog in programs().iter().filter(|p| p.category == cat) {
        assert_wafer_output(prog.name, prog.code, prog.expected);
    }
}

#[test]
fn wafer_arithmetic() {
    run_category(Category::Arithmetic);
}

#[test]
fn wafer_stack_ops() {
    run_category(Category::StackOps);
}

#[test]
fn wafer_control_flow() {
    run_category(Category::ControlFlow);
}

#[test]
fn wafer_loops() {
    run_category(Category::Loops);
}

#[test]
fn wafer_definitions() {
    run_category(Category::Definitions);
}

#[test]
fn wafer_strings() {
    run_category(Category::Strings);
}

#[test]
fn wafer_recursion() {
    run_category(Category::Recursion);
}

#[test]
fn wafer_memory() {
    run_category(Category::Memory);
}

/// Verify that all optimizations produce the same output as unoptimized.
#[test]
fn wafer_optimized_matches_unoptimized() {
    for prog in programs() {
        let base = run_wafer(prog.code);
        let opt = run_wafer_optimized(prog.code);
        assert!(base.success, "{}: unoptimized failed", prog.name);
        assert!(opt.success, "{}: optimized failed", prog.name);
        assert_eq!(
            normalize(&base.output),
            normalize(&opt.output),
            "{}: optimized output differs from unoptimized",
            prog.name
        );
    }
}

// -----------------------------------------------------------------------
// Cross-engine behavioral comparison (requires gforth)
// -----------------------------------------------------------------------

#[test]
#[ignore = "requires gforth installation"]
fn compare_all_programs() {
    if find_gforth().is_none() {
        eprintln!("SKIP: gforth not found in PATH");
        return;
    }
    let progs = programs();
    let mut passed = 0;
    let mut skipped = 0;
    for prog in &progs {
        let wafer = run_wafer(prog.code);
        if !wafer.success {
            panic!("{}: WAFER execution failed", prog.name);
        }
        let Some(gforth) = run_gforth(prog.code) else {
            skipped += 1;
            continue;
        };
        if !gforth.success {
            eprintln!("  WARN {}: gforth execution failed, skipping", prog.name);
            skipped += 1;
            continue;
        }
        assert_eq!(
            normalize(&wafer.output),
            normalize(&gforth.output),
            "{}: output differs\n  WAFER:  {:?}\n  gforth: {:?}",
            prog.name,
            wafer.output,
            gforth.output
        );
        passed += 1;
    }
    eprintln!(
        "\nBehavioral comparison: {passed} passed, {skipped} skipped (of {})",
        progs.len()
    );
}

// -----------------------------------------------------------------------
// Performance comparison (requires gforth)
// -----------------------------------------------------------------------

struct PerfBenchmark {
    name: &'static str,
    define: &'static str,
    /// The workload to time — should include its own iteration loop for
    /// fast operations so that total execution time is measurable.
    run_code: &'static str,
    verify: &'static str,
    expected: i32,
    samples: u32, // Number of runs for WAFER median
}

fn perf_benchmarks() -> Vec<PerfBenchmark> {
    vec![
        PerfBenchmark {
            name: "Fibonacci(25)",
            define: ": FIB DUP 2 < IF EXIT THEN DUP 1- RECURSE SWAP 2 - RECURSE + ;",
            run_code: "25 FIB DROP",
            verify: "25 FIB",
            expected: 75025,
            samples: 5,
        },
        PerfBenchmark {
            name: "Factorial(12)x10K",
            define: ": FACT 1 SWAP 1+ 1 ?DO I * LOOP ; \
                     : FACT-BENCH 10000 0 DO 12 FACT DROP LOOP ;",
            run_code: "FACT-BENCH",
            verify: "12 FACT",
            expected: 479001600,
            samples: 5,
        },
        PerfBenchmark {
            name: "GCD-bench(500)",
            define: ": GCD BEGIN DUP WHILE TUCK MOD REPEAT DROP ; \
                     : GCD-BENCH 0 DO 10000 I 1+ GCD DROP LOOP ;",
            run_code: "500 GCD-BENCH",
            verify: "48 36 GCD",
            expected: 12,
            samples: 5,
        },
        PerfBenchmark {
            name: "NestedLoops(50)",
            define: ": NESTED 0 SWAP 0 DO I 0 ?DO I J + DROP LOOP LOOP ; \
                     : NESTED-BENCH 100 0 DO 50 NESTED DROP LOOP ;",
            run_code: "NESTED-BENCH",
            verify: "5 NESTED",
            expected: 0,
            samples: 3,
        },
        PerfBenchmark {
            name: "Collatz(2K)",
            define: ": COLLATZ 0 SWAP BEGIN DUP 1 > WHILE \
                     DUP 1 AND IF 3 * 1+ ELSE 2 / THEN \
                     SWAP 1+ SWAP REPEAT DROP ; \
                     : COLLATZ-BENCH 0 DO I 1+ COLLATZ DROP LOOP ;",
            run_code: "2000 COLLATZ-BENCH",
            verify: "27 COLLATZ",
            expected: 111,
            samples: 3,
        },
    ]
}

/// Build the WAFER release binary and return its path.
/// Returns None if the build fails.
fn build_wafer_release() -> Option<String> {
    // Find workspace root (two levels up from crates/core)
    let manifest_dir = env!("CARGO_MANIFEST_DIR");
    let workspace_root = std::path::Path::new(manifest_dir)
        .parent()?
        .parent()?;
    let output = Command::new("cargo")
        .args(["build", "--release", "-p", "wafer"])
        .current_dir(workspace_root)
        .output()
        .ok()?;
    if !output.status.success() {
        eprintln!(
            "WARN: cargo build --release failed: {}",
            String::from_utf8_lossy(&output.stderr)
        );
        return None;
    }
    let target_dir = workspace_root.join(
        std::env::var("CARGO_TARGET_DIR").unwrap_or_else(|_| "target".to_string()),
    );
    let binary = target_dir.join("release/wafer");
    if binary.exists() {
        Some(binary.to_string_lossy().into_owned())
    } else {
        None
    }
}

static WAFER_RELEASE: OnceLock<Option<String>> = OnceLock::new();

fn find_wafer_release() -> Option<&'static str> {
    WAFER_RELEASE
        .get_or_init(|| build_wafer_release())
        .as_deref()
}

/// Measure WAFER execution time using a release-mode binary with UTIME.
/// Same approach as gforth: Forth-level timing excludes startup.
fn measure_wafer_release(wafer: &str, bench: &PerfBenchmark) -> Option<u64> {
    let code = format!(
        "{define} {run} \
         : TIMED-BENCH UTIME {run} UTIME 2SWAP D- DROP . CR ; \
         TIMED-BENCH TIMED-BENCH TIMED-BENCH",
        define = bench.define,
        run = bench.run_code,
    );
    let output = Command::new(wafer)
        .stdin(std::process::Stdio::piped())
        .stdout(std::process::Stdio::piped())
        .stderr(std::process::Stdio::piped())
        .spawn()
        .and_then(|mut child| {
            use std::io::Write;
            child.stdin.take().unwrap().write_all(code.as_bytes())?;
            child.wait_with_output()
        })
        .ok()?;
    if !output.status.success() {
        return None;
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    let mut times: Vec<u64> = stdout
        .trim()
        .lines()
        .filter_map(|l| l.trim().parse::<u64>().ok())
        .collect();
    times.sort();
    if times.is_empty() {
        return None;
    }
    Some(times[times.len() / 2])
}

/// Measure WAFER execution time after CONSOLIDATE (direct calls between all words).
fn measure_wafer_consolidated(wafer: &str, bench: &PerfBenchmark) -> Option<u64> {
    let code = format!(
        "{define} CONSOLIDATE {run} \
         : TIMED-BENCH UTIME {run} UTIME 2SWAP D- DROP . CR ; \
         TIMED-BENCH TIMED-BENCH TIMED-BENCH",
        define = bench.define,
        run = bench.run_code,
    );
    let output = Command::new(wafer)
        .stdin(std::process::Stdio::piped())
        .stdout(std::process::Stdio::piped())
        .stderr(std::process::Stdio::piped())
        .spawn()
        .and_then(|mut child| {
            use std::io::Write;
            child.stdin.take().unwrap().write_all(code.as_bytes())?;
            child.wait_with_output()
        })
        .ok()?;
    if !output.status.success() {
        return None;
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    let mut times: Vec<u64> = stdout
        .trim()
        .lines()
        .filter_map(|l| l.trim().parse::<u64>().ok())
        .collect();
    times.sort();
    if times.is_empty() {
        return None;
    }
    Some(times[times.len() / 2])
}

/// Measure gforth execution time using Forth-level `utime` (excludes startup).
/// Both engines run the exact same `run_code`, so the comparison is apples-to-apples.
/// Returns microseconds, or None if gforth is unavailable.
fn measure_gforth(gforth: &str, bench: &PerfBenchmark) -> Option<u64> {
    // The timing wrapper must be inside a word (DO/LOOP is compile-only in gforth).
    // We take the median of 3 runs.
    let code = format!(
        "{define} {run} \
         : TIMED-BENCH utime {run} utime 2swap d- drop . CR ; \
         TIMED-BENCH TIMED-BENCH TIMED-BENCH bye",
        define = bench.define,
        run = bench.run_code,
    );
    let output = Command::new(gforth).arg("-e").arg(&code).output().ok()?;
    if !output.status.success() {
        return None;
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    // Parse the 3 timing values and take the median
    let mut times: Vec<u64> = stdout
        .trim()
        .lines()
        .filter_map(|l| l.trim().parse::<u64>().ok())
        .collect();
    times.sort();
    if times.is_empty() {
        return None;
    }
    Some(times[times.len() / 2])
}

#[test]
#[ignore = "requires gforth installation"]
fn performance_report() {
    let gforth = find_gforth();
    let gforth_fast = find_gforth_fast();
    let wafer_release = find_wafer_release();
    if gforth.is_none() {
        eprintln!("SKIP: gforth not found");
        return;
    }
    if wafer_release.is_none() {
        eprintln!("WARN: could not build WAFER release binary, using in-process (debug) timing");
    }

    let benchmarks = perf_benchmarks();

    // Verify correctness first
    for bench in &benchmarks {
        let mut vm = ForthVM::new().expect("VM creation failed");
        for line in bench.define.lines() {
            let trimmed = line.trim();
            if !trimmed.is_empty() {
                let _ = vm.evaluate(trimmed);
            }
        }
        vm.take_output();
        vm.evaluate(bench.verify)
            .unwrap_or_else(|e| panic!("{}: verify failed: {e}", bench.name));
        vm.take_output();
        let stack = vm.data_stack();
        assert_eq!(
            stack.first().copied().unwrap_or(-1),
            bench.expected,
            "{}: wrong result",
            bench.name
        );
    }

    let sep = "=".repeat(80);
    let thin = "-".repeat(80);
    println!("\n{sep}");
    println!("  WAFER vs Gforth Performance Comparison (release mode)");
    println!("{sep}\n");
    println!(
        "{:<22} {:>10} {:>10} {:>10} {:>10} {:>10}",
        "Benchmark", "WAFER", "CONSOL", "gforth", "gf-fast", "WAFER/gf"
    );
    println!(
        "{:<22} {:>10} {:>10} {:>10} {:>10} {:>10}",
        "", "(us)", "(us)", "(us)", "(us)", ""
    );
    println!("{thin}");

    for bench in &benchmarks {
        let wafer = wafer_release
            .and_then(|w| measure_wafer_release(w, bench))
            .unwrap_or(0);
        let consol = wafer_release
            .and_then(|w| measure_wafer_consolidated(w, bench))
            .unwrap_or(0);
        let gf = gforth.and_then(|g| measure_gforth(g, bench));
        let gf_fast = gforth_fast.and_then(|g| measure_gforth(g, bench));

        let gf_str = gf.map_or_else(|| "-".to_string(), |v| format!("{v}"));
        let gf_fast_str = gf_fast.map_or_else(|| "-".to_string(), |v| format!("{v}"));
        let best_wafer = if consol > 0 && consol < wafer {
            consol
        } else {
            wafer
        };
        let ratio = gf.map_or_else(
            || "-".to_string(),
            |g| {
                if g > 0 {
                    format!("{:.2}x", best_wafer as f64 / g as f64)
                } else {
                    "-".to_string()
                }
            },
        );

        println!(
            "{:<22} {:>10} {:>10} {:>10} {:>10} {:>10}",
            bench.name, wafer, consol, gf_str, gf_fast_str, ratio
        );
    }

    println!("{thin}");
    println!("  WAFER = all optimizations, CONSOL = after CONSOLIDATE");
    println!("  WAFER/gf = best(WAFER,CONSOL) vs gforth, < 1.0 means WAFER faster");
    println!("{sep}\n");
}