diff --git a/crates/core/tests/comparison.rs b/crates/core/tests/comparison.rs
new file mode 100644
index 0000000..9f7c799
--- /dev/null
+++ b/crates/core/tests/comparison.rs
@@ -0,0 +1,772 @@
+#![allow(dead_code)]
+//! Cross-engine comparison tests: WAFER vs gforth.
+//!
+//! Validates that WAFER produces identical output to gforth for standard
+//! Forth programs, and benchmarks performance of both engines.
+//!
+//! WAFER-only correctness: `cargo test -p wafer-core --test comparison`
+//! Full comparison + perf: `cargo test -p wafer-core --test comparison -- --nocapture --ignored`
+
+use std::process::Command;
+use std::sync::OnceLock;
+use std::time::Instant;
+
+use wafer_core::config::WaferConfig;
+use wafer_core::outer::ForthVM;
+
+// -----------------------------------------------------------------------
+// Gforth discovery (cached)
+// -----------------------------------------------------------------------
+
+static GFORTH_PATH: OnceLock