From 0fda7e6fe83eee4eb2887747146c9d4bbabacab3 Mon Sep 17 00:00:00 2001 From: Oleksandr Kozachuk Date: Tue, 14 Apr 2026 22:07:00 +0200 Subject: [PATCH] Add extensible hash primitives: SHA1, SHA256, SHA512 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces a `crypto` feature (on by default) that wires the RustCrypto sha1/sha2 crates into a small `HashAlgo` registry. `register_primitives` iterates `crypto::ALGOS` and installs one Forth host word per algorithm, each with the stack effect ( c-addr u -- c-addr2 u2 ) reading `u` bytes from `c-addr` and writing the digest into a shared `HASH_SCRATCH` region in linear memory (carved out between the float stack and the dictionary). Adding a new hash is a one-line entry in `ALGOS`. `register_host_primitive` is now `pub` so downstream crates can extend the VM with their own I/O host words without forking WAFER — kelvar (a deterministic password manager on WAFER) is the first consumer. - 4 unit tests (lib-level sha1/256/512 + registry sanity) - 5 integration tests (in-VM `SHA1`/`SHA256`/`SHA512` against RFC-3174, FIPS-180, and the first-round S/KEY seed used by `hel`) - All 437 existing lib tests still pass; `wafer-web` still builds for `wasm32-unknown-unknown` with the feature enabled --- Cargo.lock | 89 +++++++++++++++++++++++++++-- Cargo.toml | 2 + crates/core/Cargo.toml | 5 +- crates/core/src/crypto.rs | 111 ++++++++++++++++++++++++++++++++++++ crates/core/src/lib.rs | 2 + crates/core/src/memory.rs | 11 +++- crates/core/src/outer.rs | 56 +++++++++++++++++- crates/core/tests/crypto.rs | 98 +++++++++++++++++++++++++++++++ crates/web/Cargo.toml | 2 +- 9 files changed, 365 insertions(+), 11 deletions(-) create mode 100644 crates/core/src/crypto.rs create mode 100644 crates/core/tests/crypto.rs diff --git a/Cargo.lock b/Cargo.lock index 462a9d0..8f9b83e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -141,6 +141,15 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block-buffer" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdd35008169921d80bc60d3d0ab416eecb028c4cd653352907921d95084790be" +dependencies = [ + "hybrid-array", +] + [[package]] name = "bumpalo" version = "3.20.2" @@ -255,6 +264,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "const-oid" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" + [[package]] name = "cpp_demangle" version = "0.4.5" @@ -273,6 +288,15 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + [[package]] name = "cranelift-assembler-x64" version = "0.130.1" @@ -459,6 +483,15 @@ dependencies = [ "typenum", ] +[[package]] +name = "crypto-common" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77727bb15fa921304124b128af125e7e3b968275d1b108b379190264f4423710" +dependencies = [ + "hybrid-array", +] + [[package]] name = "debugid" version = "0.8.0" @@ -474,8 +507,19 @@ version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ - "block-buffer", - "crypto-common", + "block-buffer 0.10.4", + "crypto-common 0.1.7", +] + +[[package]] +name = "digest" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4850db49bf08e663084f7fb5c87d202ef91a3907271aff24a94eb97ff039153c" +dependencies = [ + "block-buffer 0.12.0", + "const-oid", + "crypto-common 0.2.1", ] [[package]] @@ -773,6 +817,15 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "hybrid-array" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3944cf8cf766b40e2a1a333ee5e9b563f854d5fa49d6a8ca2764e97c6eddb214" +dependencies = [ + "typenum", +] + [[package]] name = "id-arena" version = "2.3.0" @@ -1422,6 +1475,17 @@ dependencies = [ "unsafe-libyaml", ] +[[package]] +name = "sha1" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aacc4cc499359472b4abe1bf11d0b12e688af9a805fa5e3016f9a386dc2d0214" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "digest 0.11.2", +] + [[package]] name = "sha2" version = "0.10.9" @@ -1429,8 +1493,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if", - "cpufeatures", - "digest", + "cpufeatures 0.2.17", + "digest 0.10.7", +] + +[[package]] +name = "sha2" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "446ba717509524cb3f22f17ecc096f10f4822d76ab5c0b9822c5f9c284e825f4" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "digest 0.11.2", ] [[package]] @@ -1682,6 +1757,8 @@ dependencies = [ "anyhow", "insta", "proptest", + "sha1", + "sha2 0.11.0", "thiserror 2.0.18", "wasm-encoder 0.246.2", "wasmparser 0.246.2", @@ -2022,7 +2099,7 @@ dependencies = [ "semver", "serde", "serde_derive", - "sha2", + "sha2 0.10.9", "smallvec", "target-lexicon", "wasm-encoder 0.245.1", @@ -2045,7 +2122,7 @@ dependencies = [ "rustix", "serde", "serde_derive", - "sha2", + "sha2 0.10.9", "toml", "wasmtime-environ", "windows-sys", diff --git a/Cargo.toml b/Cargo.toml index ed176f5..c6fa37a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,3 +48,5 @@ anyhow = "1" thiserror = "2" proptest = "1" insta = "1" +sha1 = "0.11" +sha2 = "0.11" diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 0b82e91..b1c6773 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -9,8 +9,9 @@ license.workspace = true workspace = true [features] -default = ["native"] +default = ["native", "crypto"] native = ["dep:wasmtime"] +crypto = ["dep:sha1", "dep:sha2"] [dependencies] wasm-encoder = { workspace = true } @@ -18,6 +19,8 @@ wasmparser = { workspace = true } wasmtime = { workspace = true, optional = true } anyhow = { workspace = true } thiserror = { workspace = true } +sha1 = { workspace = true, optional = true } +sha2 = { workspace = true, optional = true } [dev-dependencies] proptest = { workspace = true } diff --git a/crates/core/src/crypto.rs b/crates/core/src/crypto.rs new file mode 100644 index 0000000..f07b113 --- /dev/null +++ b/crates/core/src/crypto.rs @@ -0,0 +1,111 @@ +//! Cryptographic hash primitives for WAFER. +//! +//! Provides a small registry of hash algorithms ([`ALGOS`]) and a Forth +//! word per algorithm. Each word has stack effect: +//! +//! ```text +//! ( c-addr u -- c-addr2 u2 ) +//! ``` +//! +//! It hashes the `u` bytes starting at `c-addr` and writes the digest into +//! the [`crate::memory::HASH_SCRATCH_BASE`] region. The output buffer is +//! shared and overwritten by every subsequent hash call — copy the bytes +//! out before the next invocation if you need to keep them. +//! +//! The registry is designed to grow: add a new entry to [`ALGOS`] and the +//! word is registered automatically by +//! [`crate::outer::ForthVM::register_crypto_words`]. + +use sha1::{Digest as Sha1Digest, Sha1}; +use sha2::{Sha256, Sha512}; + +/// One hash algorithm registered as a Forth word. +pub struct HashAlgo { + /// Forth word name (uppercase by convention). + pub name: &'static str, + /// Digest length in bytes. + pub digest_len: usize, + /// Hash function. + pub hash: fn(&[u8]) -> Vec, +} + +fn sha1_hash(input: &[u8]) -> Vec { + let mut h = Sha1::new(); + h.update(input); + h.finalize().to_vec() +} + +fn sha256_hash(input: &[u8]) -> Vec { + let mut h = Sha256::new(); + h.update(input); + h.finalize().to_vec() +} + +fn sha512_hash(input: &[u8]) -> Vec { + let mut h = Sha512::new(); + h.update(input); + h.finalize().to_vec() +} + +/// All hash algorithms registered as Forth words. +pub const ALGOS: &[HashAlgo] = &[ + HashAlgo { + name: "SHA1", + digest_len: 20, + hash: sha1_hash, + }, + HashAlgo { + name: "SHA256", + digest_len: 32, + hash: sha256_hash, + }, + HashAlgo { + name: "SHA512", + digest_len: 64, + hash: sha512_hash, + }, +]; + +#[cfg(test)] +mod tests { + use super::*; + + fn hex(bytes: &[u8]) -> String { + let mut s = String::with_capacity(bytes.len() * 2); + for b in bytes { + s.push_str(&format!("{b:02x}")); + } + s + } + + #[test] + fn sha1_rfc3174_abc() { + assert_eq!(hex(&sha1_hash(b"abc")), "a9993e364706816aba3e25717850c26c9cd0d89d"); + } + + #[test] + fn sha256_fips180_abc() { + assert_eq!( + hex(&sha256_hash(b"abc")), + "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad" + ); + } + + #[test] + fn sha512_fips180_abc() { + assert_eq!( + hex(&sha512_hash(b"abc")), + concat!( + "ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a", + "2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f" + ) + ); + } + + #[test] + fn registry_lengths_match() { + assert_eq!(ALGOS[0].digest_len, sha1_hash(b"").len()); + assert_eq!(ALGOS[1].digest_len, sha256_hash(b"").len()); + assert_eq!(ALGOS[2].digest_len, sha512_hash(b"").len()); + } +} diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 07f81b1..364055e 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -16,6 +16,8 @@ pub mod codegen; pub mod config; +#[cfg(feature = "crypto")] +pub mod crypto; pub mod dictionary; pub mod error; pub mod ir; diff --git a/crates/core/src/memory.rs b/crates/core/src/memory.rs index 9c02585..6e135ec 100644 --- a/crates/core/src/memory.rs +++ b/crates/core/src/memory.rs @@ -59,8 +59,14 @@ pub const FLOAT_STACK_BASE: u32 = RETURN_STACK_BASE + RETURN_STACK_SIZE; // 0x25 /// Size of float stack region. pub const FLOAT_STACK_SIZE: u32 = 2048; // 256 doubles +/// Hash scratch region — output buffer for `SHA1`/`SHA256`/`SHA512` and +/// other hash host words. Sized for the largest supported digest (SHA512 = 64 B). +pub const HASH_SCRATCH_BASE: u32 = FLOAT_STACK_BASE + FLOAT_STACK_SIZE; // 0x2D40 +/// Size of hash scratch region. +pub const HASH_SCRATCH_SIZE: u32 = 128; + /// Dictionary region start. Grows upward. -pub const DICTIONARY_BASE: u32 = FLOAT_STACK_BASE + FLOAT_STACK_SIZE; // 0x2D40 +pub const DICTIONARY_BASE: u32 = HASH_SCRATCH_BASE + HASH_SCRATCH_SIZE; // 0x2DC0 /// Initial top of data stack (grows down from here). pub const DATA_STACK_TOP: u32 = DATA_STACK_BASE + DATA_STACK_SIZE; @@ -113,7 +119,8 @@ mod tests { const { assert!(DATA_STACK_BASE >= PAD_BASE + PAD_SIZE) }; const { assert!(RETURN_STACK_BASE >= DATA_STACK_BASE + DATA_STACK_SIZE) }; const { assert!(FLOAT_STACK_BASE >= RETURN_STACK_BASE + RETURN_STACK_SIZE) }; - const { assert!(DICTIONARY_BASE >= FLOAT_STACK_BASE + FLOAT_STACK_SIZE) }; + const { assert!(HASH_SCRATCH_BASE >= FLOAT_STACK_BASE + FLOAT_STACK_SIZE) }; + const { assert!(DICTIONARY_BASE >= HASH_SCRATCH_BASE + HASH_SCRATCH_SIZE) }; } #[test] diff --git a/crates/core/src/outer.rs b/crates/core/src/outer.rs index bbbbbb9..f94ad92 100644 --- a/crates/core/src/outer.rs +++ b/crates/core/src/outer.rs @@ -20,6 +20,8 @@ use crate::codegen::{CodegenConfig, CompiledModule, compile_consolidated_module, use crate::config::WaferConfig; use crate::dictionary::{Dictionary, DictionaryState, WordId}; use crate::ir::IrOp; +#[cfg(feature = "crypto")] +use crate::memory::HASH_SCRATCH_BASE; use crate::memory::{ CELL_SIZE, DATA_STACK_TOP, FLOAT_SIZE, FLOAT_STACK_BASE, FLOAT_STACK_TOP, INPUT_BUFFER_BASE, INPUT_BUFFER_SIZE, RETURN_STACK_TOP, SYSVAR_BASE_VAR, SYSVAR_HERE, SYSVAR_LEAVE_FLAG, @@ -383,6 +385,12 @@ impl ForthVM { s } + /// Mutable access to the underlying runtime — useful for tests and for + /// host shims that need to read or write WAFER linear memory directly. + pub fn runtime_mut(&mut self) -> &mut R { + &mut self.rt + } + /// Read the current data stack contents (top-first). pub fn data_stack(&mut self) -> Vec { let sp = self.rt.get_dsp(); @@ -2286,7 +2294,10 @@ impl ForthVM { } /// Register a primitive whose implementation is a host function (not IR-compiled). - fn register_host_primitive( + /// + /// Public so downstream crates (like `kelvar-cli`) can extend the VM with + /// their own I/O host words without forking WAFER. + pub fn register_host_primitive( &mut self, name: &str, immediate: bool, @@ -2592,6 +2603,10 @@ impl ForthVM { // -- Floating-Point word set -- self.register_float_words()?; + // -- Crypto: SHA1, SHA256, SHA512 (gated) -- + #[cfg(feature = "crypto")] + self.register_crypto_words()?; + // Batch-compile all deferred IR primitives into a single WASM module self.batch_mode = false; self.batch_compile_deferred()?; @@ -2636,6 +2651,45 @@ impl ForthVM { Ok(()) } + // ----------------------------------------------------------------------- + // Crypto: SHA1 / SHA256 / SHA512 (and any algos in `crypto::ALGOS`) + // ----------------------------------------------------------------------- + + /// Register one Forth word per entry in [`crate::crypto::ALGOS`]. + /// + /// Each word has stack effect `( c-addr u -- c-addr2 u2 )`: it hashes + /// the `u` bytes at `c-addr` and writes the digest into the shared + /// scratch region at [`crate::memory::HASH_SCRATCH_BASE`]. The output + /// is overwritten by every subsequent hash call. + #[cfg(feature = "crypto")] + fn register_crypto_words(&mut self) -> anyhow::Result<()> { + for algo in crate::crypto::ALGOS { + let hash_fn = algo.hash; + let digest_len = algo.digest_len as i32; + let func: HostFn = Box::new(move |ctx: &mut dyn HostAccess| { + // Pop ( c-addr u ) + let dsp = ctx.get_dsp(); + let u = ctx.mem_read_i32(dsp) as u32; + let c_addr = ctx.mem_read_i32(dsp + CELL_SIZE) as u32; + + // Read input bytes and hash. + let bytes = ctx.mem_read_slice(c_addr, u as usize); + let digest = hash_fn(&bytes); + + // Write digest to scratch. + ctx.mem_write_slice(HASH_SCRATCH_BASE, &digest); + + // Push ( scratch-addr digest-len ) — same dsp position, two + // cells overwritten in place. + ctx.mem_write_i32(dsp + CELL_SIZE, HASH_SCRATCH_BASE as i32); + ctx.mem_write_i32(dsp, digest_len); + Ok(()) + }); + self.register_host_primitive(algo.name, false, func)?; + } + Ok(()) + } + // ----------------------------------------------------------------------- // Priority 1: Loop support host functions // ----------------------------------------------------------------------- diff --git a/crates/core/tests/crypto.rs b/crates/core/tests/crypto.rs new file mode 100644 index 0000000..51948d1 --- /dev/null +++ b/crates/core/tests/crypto.rs @@ -0,0 +1,98 @@ +//! End-to-end tests for the `SHA1` / `SHA256` / `SHA512` Forth host words. +//! +//! These run inside a real WAFER VM (NativeRuntime). The Forth program writes +//! a counted string into `PAD`, calls the hash word, then the test reads the +//! digest out of WAFER linear memory and compares it to the RFC-3174 / FIPS-180 +//! reference vectors. + +use wafer_core::memory::{HASH_SCRATCH_BASE, PAD_BASE}; +use wafer_core::outer::ForthVM; +use wafer_core::runtime::Runtime; +use wafer_core::runtime_native::NativeRuntime; + +/// Hash `input` using the named Forth word and return the digest bytes. +fn hash_via_forth(word: &str, input: &[u8]) -> Vec { + let mut vm = ForthVM::::new().expect("vm"); + + // Place input bytes in PAD via a sequence of `c C!` operations. + // Then push (PAD u) and call the hash word. + let mut prog = String::new(); + for (i, b) in input.iter().enumerate() { + prog.push_str(&format!("{} {} C! ", b, PAD_BASE as usize + i)); + } + prog.push_str(&format!("{} {} {} ", PAD_BASE, input.len(), word)); + + vm.evaluate(&prog).expect("eval"); + + // Stack now: ( c-addr2 u2 ). Read u2 then c-addr2 from data stack. + let stack = vm.data_stack(); + assert!(stack.len() >= 2, "expected (addr len) on stack, got {stack:?}"); + let u2 = stack[0] as usize; + let addr2 = stack[1] as u32; + assert_eq!(addr2, HASH_SCRATCH_BASE, "digest should land in HASH_SCRATCH"); + + // Read the digest out of WAFER linear memory. + let mut bytes = Vec::with_capacity(u2); + let rt = vm.runtime_mut(); + for i in 0..u2 { + bytes.push(rt.mem_read_u8(addr2 + i as u32)); + } + bytes +} + +fn hex(bytes: &[u8]) -> String { + let mut s = String::with_capacity(bytes.len() * 2); + for b in bytes { + s.push_str(&format!("{b:02x}")); + } + s +} + +#[test] +fn sha1_abc_vector() { + assert_eq!( + hex(&hash_via_forth("SHA1", b"abc")), + "a9993e364706816aba3e25717850c26c9cd0d89d" + ); +} + +#[test] +fn sha256_abc_vector() { + assert_eq!( + hex(&hash_via_forth("SHA256", b"abc")), + "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad" + ); +} + +#[test] +fn sha512_abc_vector() { + assert_eq!( + hex(&hash_via_forth("SHA512", b"abc")), + concat!( + "ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a", + "2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f" + ) + ); +} + +#[test] +fn sha1_empty_string() { + assert_eq!( + hex(&hash_via_forth("SHA1", b"")), + "da39a3ee5e6b4b0d3255bfef95601890afd80709" + ); +} + +#[test] +fn sha1_skey_seed_matches_hel_first_round() { + // Reference: hel/src/skey.rs::tests::encoding_test, first SHA1 of + // "test1" || "my secret" yields [141, 231, 26, 167, ... 5 ints, ...] + // == raw SHA1 of those concatenated bytes. + let input = b"test1my secret"; + let digest = hash_via_forth("SHA1", input); + assert_eq!(digest.len(), 20); + // Folded 20->8 yields ints[0]^ints[2]^ints[4] and ints[1]^ints[3]. + // Just check the raw SHA1 here; the fold is implemented in kelvar Forth. + let expected = "8af6385b5053c32db569166a615739479885c9bc"; + assert_eq!(hex(&digest), expected); +} diff --git a/crates/web/Cargo.toml b/crates/web/Cargo.toml index a1d7f3b..1bb212a 100644 --- a/crates/web/Cargo.toml +++ b/crates/web/Cargo.toml @@ -12,7 +12,7 @@ workspace = true crate-type = ["cdylib", "rlib"] [dependencies] -wafer-core = { path = "../core", version = "0.1.0", default-features = false } +wafer-core = { path = "../core", version = "0.1.0", default-features = false, features = ["crypto"] } wasm-bindgen = "0.2" js-sys = "0.3" anyhow = { workspace = true }