Add extensible hash primitives: SHA1, SHA256, SHA512

Introduces a `crypto` feature (on by default) that wires the RustCrypto
sha1/sha2 crates into a small `HashAlgo` registry. `register_primitives`
iterates `crypto::ALGOS` and installs one Forth host word per algorithm,
each with the stack effect

    ( c-addr u -- c-addr2 u2 )

reading `u` bytes from `c-addr` and writing the digest into a shared
`HASH_SCRATCH` region in linear memory (carved out between the float
stack and the dictionary).

Adding a new hash is a one-line entry in `ALGOS`. `register_host_primitive`
is now `pub` so downstream crates can extend the VM with their own I/O
host words without forking WAFER — kelvar (a deterministic password
manager on WAFER) is the first consumer.

- 4 unit tests (lib-level sha1/256/512 + registry sanity)
- 5 integration tests (in-VM `SHA1`/`SHA256`/`SHA512` against RFC-3174,
  FIPS-180, and the first-round S/KEY seed used by `hel`)
- All 437 existing lib tests still pass; `wafer-web` still builds for
  `wasm32-unknown-unknown` with the feature enabled
This commit is contained in:
2026-04-14 22:07:00 +02:00
parent 5dccc1ac9e
commit 0fda7e6fe8
9 changed files with 365 additions and 11 deletions
+4 -1
View File
@@ -9,8 +9,9 @@ license.workspace = true
workspace = true
[features]
default = ["native"]
default = ["native", "crypto"]
native = ["dep:wasmtime"]
crypto = ["dep:sha1", "dep:sha2"]
[dependencies]
wasm-encoder = { workspace = true }
@@ -18,6 +19,8 @@ wasmparser = { workspace = true }
wasmtime = { workspace = true, optional = true }
anyhow = { workspace = true }
thiserror = { workspace = true }
sha1 = { workspace = true, optional = true }
sha2 = { workspace = true, optional = true }
[dev-dependencies]
proptest = { workspace = true }
+111
View File
@@ -0,0 +1,111 @@
//! Cryptographic hash primitives for WAFER.
//!
//! Provides a small registry of hash algorithms ([`ALGOS`]) and a Forth
//! word per algorithm. Each word has stack effect:
//!
//! ```text
//! ( c-addr u -- c-addr2 u2 )
//! ```
//!
//! It hashes the `u` bytes starting at `c-addr` and writes the digest into
//! the [`crate::memory::HASH_SCRATCH_BASE`] region. The output buffer is
//! shared and overwritten by every subsequent hash call — copy the bytes
//! out before the next invocation if you need to keep them.
//!
//! The registry is designed to grow: add a new entry to [`ALGOS`] and the
//! word is registered automatically by
//! [`crate::outer::ForthVM::register_crypto_words`].
use sha1::{Digest as Sha1Digest, Sha1};
use sha2::{Sha256, Sha512};
/// One hash algorithm registered as a Forth word.
pub struct HashAlgo {
/// Forth word name (uppercase by convention).
pub name: &'static str,
/// Digest length in bytes.
pub digest_len: usize,
/// Hash function.
pub hash: fn(&[u8]) -> Vec<u8>,
}
fn sha1_hash(input: &[u8]) -> Vec<u8> {
let mut h = Sha1::new();
h.update(input);
h.finalize().to_vec()
}
fn sha256_hash(input: &[u8]) -> Vec<u8> {
let mut h = Sha256::new();
h.update(input);
h.finalize().to_vec()
}
fn sha512_hash(input: &[u8]) -> Vec<u8> {
let mut h = Sha512::new();
h.update(input);
h.finalize().to_vec()
}
/// All hash algorithms registered as Forth words.
pub const ALGOS: &[HashAlgo] = &[
HashAlgo {
name: "SHA1",
digest_len: 20,
hash: sha1_hash,
},
HashAlgo {
name: "SHA256",
digest_len: 32,
hash: sha256_hash,
},
HashAlgo {
name: "SHA512",
digest_len: 64,
hash: sha512_hash,
},
];
#[cfg(test)]
mod tests {
use super::*;
fn hex(bytes: &[u8]) -> String {
let mut s = String::with_capacity(bytes.len() * 2);
for b in bytes {
s.push_str(&format!("{b:02x}"));
}
s
}
#[test]
fn sha1_rfc3174_abc() {
assert_eq!(hex(&sha1_hash(b"abc")), "a9993e364706816aba3e25717850c26c9cd0d89d");
}
#[test]
fn sha256_fips180_abc() {
assert_eq!(
hex(&sha256_hash(b"abc")),
"ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
);
}
#[test]
fn sha512_fips180_abc() {
assert_eq!(
hex(&sha512_hash(b"abc")),
concat!(
"ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a",
"2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f"
)
);
}
#[test]
fn registry_lengths_match() {
assert_eq!(ALGOS[0].digest_len, sha1_hash(b"").len());
assert_eq!(ALGOS[1].digest_len, sha256_hash(b"").len());
assert_eq!(ALGOS[2].digest_len, sha512_hash(b"").len());
}
}
+2
View File
@@ -16,6 +16,8 @@
pub mod codegen;
pub mod config;
#[cfg(feature = "crypto")]
pub mod crypto;
pub mod dictionary;
pub mod error;
pub mod ir;
+9 -2
View File
@@ -59,8 +59,14 @@ pub const FLOAT_STACK_BASE: u32 = RETURN_STACK_BASE + RETURN_STACK_SIZE; // 0x25
/// Size of float stack region.
pub const FLOAT_STACK_SIZE: u32 = 2048; // 256 doubles
/// Hash scratch region — output buffer for `SHA1`/`SHA256`/`SHA512` and
/// other hash host words. Sized for the largest supported digest (SHA512 = 64 B).
pub const HASH_SCRATCH_BASE: u32 = FLOAT_STACK_BASE + FLOAT_STACK_SIZE; // 0x2D40
/// Size of hash scratch region.
pub const HASH_SCRATCH_SIZE: u32 = 128;
/// Dictionary region start. Grows upward.
pub const DICTIONARY_BASE: u32 = FLOAT_STACK_BASE + FLOAT_STACK_SIZE; // 0x2D40
pub const DICTIONARY_BASE: u32 = HASH_SCRATCH_BASE + HASH_SCRATCH_SIZE; // 0x2DC0
/// Initial top of data stack (grows down from here).
pub const DATA_STACK_TOP: u32 = DATA_STACK_BASE + DATA_STACK_SIZE;
@@ -113,7 +119,8 @@ mod tests {
const { assert!(DATA_STACK_BASE >= PAD_BASE + PAD_SIZE) };
const { assert!(RETURN_STACK_BASE >= DATA_STACK_BASE + DATA_STACK_SIZE) };
const { assert!(FLOAT_STACK_BASE >= RETURN_STACK_BASE + RETURN_STACK_SIZE) };
const { assert!(DICTIONARY_BASE >= FLOAT_STACK_BASE + FLOAT_STACK_SIZE) };
const { assert!(HASH_SCRATCH_BASE >= FLOAT_STACK_BASE + FLOAT_STACK_SIZE) };
const { assert!(DICTIONARY_BASE >= HASH_SCRATCH_BASE + HASH_SCRATCH_SIZE) };
}
#[test]
+55 -1
View File
@@ -20,6 +20,8 @@ use crate::codegen::{CodegenConfig, CompiledModule, compile_consolidated_module,
use crate::config::WaferConfig;
use crate::dictionary::{Dictionary, DictionaryState, WordId};
use crate::ir::IrOp;
#[cfg(feature = "crypto")]
use crate::memory::HASH_SCRATCH_BASE;
use crate::memory::{
CELL_SIZE, DATA_STACK_TOP, FLOAT_SIZE, FLOAT_STACK_BASE, FLOAT_STACK_TOP, INPUT_BUFFER_BASE,
INPUT_BUFFER_SIZE, RETURN_STACK_TOP, SYSVAR_BASE_VAR, SYSVAR_HERE, SYSVAR_LEAVE_FLAG,
@@ -383,6 +385,12 @@ impl<R: Runtime> ForthVM<R> {
s
}
/// Mutable access to the underlying runtime — useful for tests and for
/// host shims that need to read or write WAFER linear memory directly.
pub fn runtime_mut(&mut self) -> &mut R {
&mut self.rt
}
/// Read the current data stack contents (top-first).
pub fn data_stack(&mut self) -> Vec<i32> {
let sp = self.rt.get_dsp();
@@ -2286,7 +2294,10 @@ impl<R: Runtime> ForthVM<R> {
}
/// Register a primitive whose implementation is a host function (not IR-compiled).
fn register_host_primitive(
///
/// Public so downstream crates (like `kelvar-cli`) can extend the VM with
/// their own I/O host words without forking WAFER.
pub fn register_host_primitive(
&mut self,
name: &str,
immediate: bool,
@@ -2592,6 +2603,10 @@ impl<R: Runtime> ForthVM<R> {
// -- Floating-Point word set --
self.register_float_words()?;
// -- Crypto: SHA1, SHA256, SHA512 (gated) --
#[cfg(feature = "crypto")]
self.register_crypto_words()?;
// Batch-compile all deferred IR primitives into a single WASM module
self.batch_mode = false;
self.batch_compile_deferred()?;
@@ -2636,6 +2651,45 @@ impl<R: Runtime> ForthVM<R> {
Ok(())
}
// -----------------------------------------------------------------------
// Crypto: SHA1 / SHA256 / SHA512 (and any algos in `crypto::ALGOS`)
// -----------------------------------------------------------------------
/// Register one Forth word per entry in [`crate::crypto::ALGOS`].
///
/// Each word has stack effect `( c-addr u -- c-addr2 u2 )`: it hashes
/// the `u` bytes at `c-addr` and writes the digest into the shared
/// scratch region at [`crate::memory::HASH_SCRATCH_BASE`]. The output
/// is overwritten by every subsequent hash call.
#[cfg(feature = "crypto")]
fn register_crypto_words(&mut self) -> anyhow::Result<()> {
for algo in crate::crypto::ALGOS {
let hash_fn = algo.hash;
let digest_len = algo.digest_len as i32;
let func: HostFn = Box::new(move |ctx: &mut dyn HostAccess| {
// Pop ( c-addr u )
let dsp = ctx.get_dsp();
let u = ctx.mem_read_i32(dsp) as u32;
let c_addr = ctx.mem_read_i32(dsp + CELL_SIZE) as u32;
// Read input bytes and hash.
let bytes = ctx.mem_read_slice(c_addr, u as usize);
let digest = hash_fn(&bytes);
// Write digest to scratch.
ctx.mem_write_slice(HASH_SCRATCH_BASE, &digest);
// Push ( scratch-addr digest-len ) — same dsp position, two
// cells overwritten in place.
ctx.mem_write_i32(dsp + CELL_SIZE, HASH_SCRATCH_BASE as i32);
ctx.mem_write_i32(dsp, digest_len);
Ok(())
});
self.register_host_primitive(algo.name, false, func)?;
}
Ok(())
}
// -----------------------------------------------------------------------
// Priority 1: Loop support host functions
// -----------------------------------------------------------------------
+98
View File
@@ -0,0 +1,98 @@
//! End-to-end tests for the `SHA1` / `SHA256` / `SHA512` Forth host words.
//!
//! These run inside a real WAFER VM (NativeRuntime). The Forth program writes
//! a counted string into `PAD`, calls the hash word, then the test reads the
//! digest out of WAFER linear memory and compares it to the RFC-3174 / FIPS-180
//! reference vectors.
use wafer_core::memory::{HASH_SCRATCH_BASE, PAD_BASE};
use wafer_core::outer::ForthVM;
use wafer_core::runtime::Runtime;
use wafer_core::runtime_native::NativeRuntime;
/// Hash `input` using the named Forth word and return the digest bytes.
fn hash_via_forth(word: &str, input: &[u8]) -> Vec<u8> {
let mut vm = ForthVM::<NativeRuntime>::new().expect("vm");
// Place input bytes in PAD via a sequence of `c C!` operations.
// Then push (PAD u) and call the hash word.
let mut prog = String::new();
for (i, b) in input.iter().enumerate() {
prog.push_str(&format!("{} {} C! ", b, PAD_BASE as usize + i));
}
prog.push_str(&format!("{} {} {} ", PAD_BASE, input.len(), word));
vm.evaluate(&prog).expect("eval");
// Stack now: ( c-addr2 u2 ). Read u2 then c-addr2 from data stack.
let stack = vm.data_stack();
assert!(stack.len() >= 2, "expected (addr len) on stack, got {stack:?}");
let u2 = stack[0] as usize;
let addr2 = stack[1] as u32;
assert_eq!(addr2, HASH_SCRATCH_BASE, "digest should land in HASH_SCRATCH");
// Read the digest out of WAFER linear memory.
let mut bytes = Vec::with_capacity(u2);
let rt = vm.runtime_mut();
for i in 0..u2 {
bytes.push(rt.mem_read_u8(addr2 + i as u32));
}
bytes
}
fn hex(bytes: &[u8]) -> String {
let mut s = String::with_capacity(bytes.len() * 2);
for b in bytes {
s.push_str(&format!("{b:02x}"));
}
s
}
#[test]
fn sha1_abc_vector() {
assert_eq!(
hex(&hash_via_forth("SHA1", b"abc")),
"a9993e364706816aba3e25717850c26c9cd0d89d"
);
}
#[test]
fn sha256_abc_vector() {
assert_eq!(
hex(&hash_via_forth("SHA256", b"abc")),
"ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
);
}
#[test]
fn sha512_abc_vector() {
assert_eq!(
hex(&hash_via_forth("SHA512", b"abc")),
concat!(
"ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a",
"2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f"
)
);
}
#[test]
fn sha1_empty_string() {
assert_eq!(
hex(&hash_via_forth("SHA1", b"")),
"da39a3ee5e6b4b0d3255bfef95601890afd80709"
);
}
#[test]
fn sha1_skey_seed_matches_hel_first_round() {
// Reference: hel/src/skey.rs::tests::encoding_test, first SHA1 of
// "test1" || "my secret" yields [141, 231, 26, 167, ... 5 ints, ...]
// == raw SHA1 of those concatenated bytes.
let input = b"test1my secret";
let digest = hash_via_forth("SHA1", input);
assert_eq!(digest.len(), 20);
// Folded 20->8 yields ints[0]^ints[2]^ints[4] and ints[1]^ints[3].
// Just check the raw SHA1 here; the fold is implemented in kelvar Forth.
let expected = "8af6385b5053c32db569166a615739479885c9bc";
assert_eq!(hex(&digest), expected);
}