Add extensible hash primitives: SHA1, SHA256, SHA512

Introduces a `crypto` feature (on by default) that wires the RustCrypto
sha1/sha2 crates into a small `HashAlgo` registry. `register_primitives`
iterates `crypto::ALGOS` and installs one Forth host word per algorithm,
each with the stack effect

    ( c-addr u -- c-addr2 u2 )

reading `u` bytes from `c-addr` and writing the digest into a shared
`HASH_SCRATCH` region in linear memory (carved out between the float
stack and the dictionary).

Adding a new hash is a one-line entry in `ALGOS`. `register_host_primitive`
is now `pub` so downstream crates can extend the VM with their own I/O
host words without forking WAFER — kelvar (a deterministic password
manager on WAFER) is the first consumer.

- 4 unit tests (lib-level sha1/256/512 + registry sanity)
- 5 integration tests (in-VM `SHA1`/`SHA256`/`SHA512` against RFC-3174,
  FIPS-180, and the first-round S/KEY seed used by `hel`)
- All 437 existing lib tests still pass; `wafer-web` still builds for
  `wasm32-unknown-unknown` with the feature enabled
This commit is contained in:
2026-04-14 22:07:00 +02:00
parent 5dccc1ac9e
commit 0fda7e6fe8
9 changed files with 365 additions and 11 deletions
+111
View File
@@ -0,0 +1,111 @@
//! Cryptographic hash primitives for WAFER.
//!
//! Provides a small registry of hash algorithms ([`ALGOS`]) and a Forth
//! word per algorithm. Each word has stack effect:
//!
//! ```text
//! ( c-addr u -- c-addr2 u2 )
//! ```
//!
//! It hashes the `u` bytes starting at `c-addr` and writes the digest into
//! the [`crate::memory::HASH_SCRATCH_BASE`] region. The output buffer is
//! shared and overwritten by every subsequent hash call — copy the bytes
//! out before the next invocation if you need to keep them.
//!
//! The registry is designed to grow: add a new entry to [`ALGOS`] and the
//! word is registered automatically by
//! [`crate::outer::ForthVM::register_crypto_words`].
use sha1::{Digest as Sha1Digest, Sha1};
use sha2::{Sha256, Sha512};
/// One hash algorithm registered as a Forth word.
pub struct HashAlgo {
/// Forth word name (uppercase by convention).
pub name: &'static str,
/// Digest length in bytes.
pub digest_len: usize,
/// Hash function.
pub hash: fn(&[u8]) -> Vec<u8>,
}
fn sha1_hash(input: &[u8]) -> Vec<u8> {
let mut h = Sha1::new();
h.update(input);
h.finalize().to_vec()
}
fn sha256_hash(input: &[u8]) -> Vec<u8> {
let mut h = Sha256::new();
h.update(input);
h.finalize().to_vec()
}
fn sha512_hash(input: &[u8]) -> Vec<u8> {
let mut h = Sha512::new();
h.update(input);
h.finalize().to_vec()
}
/// All hash algorithms registered as Forth words.
pub const ALGOS: &[HashAlgo] = &[
HashAlgo {
name: "SHA1",
digest_len: 20,
hash: sha1_hash,
},
HashAlgo {
name: "SHA256",
digest_len: 32,
hash: sha256_hash,
},
HashAlgo {
name: "SHA512",
digest_len: 64,
hash: sha512_hash,
},
];
#[cfg(test)]
mod tests {
use super::*;
fn hex(bytes: &[u8]) -> String {
let mut s = String::with_capacity(bytes.len() * 2);
for b in bytes {
s.push_str(&format!("{b:02x}"));
}
s
}
#[test]
fn sha1_rfc3174_abc() {
assert_eq!(hex(&sha1_hash(b"abc")), "a9993e364706816aba3e25717850c26c9cd0d89d");
}
#[test]
fn sha256_fips180_abc() {
assert_eq!(
hex(&sha256_hash(b"abc")),
"ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
);
}
#[test]
fn sha512_fips180_abc() {
assert_eq!(
hex(&sha512_hash(b"abc")),
concat!(
"ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a",
"2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f"
)
);
}
#[test]
fn registry_lengths_match() {
assert_eq!(ALGOS[0].digest_len, sha1_hash(b"").len());
assert_eq!(ALGOS[1].digest_len, sha256_hash(b"").len());
assert_eq!(ALGOS[2].digest_len, sha512_hash(b"").len());
}
}
+2
View File
@@ -16,6 +16,8 @@
pub mod codegen;
pub mod config;
#[cfg(feature = "crypto")]
pub mod crypto;
pub mod dictionary;
pub mod error;
pub mod ir;
+9 -2
View File
@@ -59,8 +59,14 @@ pub const FLOAT_STACK_BASE: u32 = RETURN_STACK_BASE + RETURN_STACK_SIZE; // 0x25
/// Size of float stack region.
pub const FLOAT_STACK_SIZE: u32 = 2048; // 256 doubles
/// Hash scratch region — output buffer for `SHA1`/`SHA256`/`SHA512` and
/// other hash host words. Sized for the largest supported digest (SHA512 = 64 B).
pub const HASH_SCRATCH_BASE: u32 = FLOAT_STACK_BASE + FLOAT_STACK_SIZE; // 0x2D40
/// Size of hash scratch region.
pub const HASH_SCRATCH_SIZE: u32 = 128;
/// Dictionary region start. Grows upward.
pub const DICTIONARY_BASE: u32 = FLOAT_STACK_BASE + FLOAT_STACK_SIZE; // 0x2D40
pub const DICTIONARY_BASE: u32 = HASH_SCRATCH_BASE + HASH_SCRATCH_SIZE; // 0x2DC0
/// Initial top of data stack (grows down from here).
pub const DATA_STACK_TOP: u32 = DATA_STACK_BASE + DATA_STACK_SIZE;
@@ -113,7 +119,8 @@ mod tests {
const { assert!(DATA_STACK_BASE >= PAD_BASE + PAD_SIZE) };
const { assert!(RETURN_STACK_BASE >= DATA_STACK_BASE + DATA_STACK_SIZE) };
const { assert!(FLOAT_STACK_BASE >= RETURN_STACK_BASE + RETURN_STACK_SIZE) };
const { assert!(DICTIONARY_BASE >= FLOAT_STACK_BASE + FLOAT_STACK_SIZE) };
const { assert!(HASH_SCRATCH_BASE >= FLOAT_STACK_BASE + FLOAT_STACK_SIZE) };
const { assert!(DICTIONARY_BASE >= HASH_SCRATCH_BASE + HASH_SCRATCH_SIZE) };
}
#[test]
+55 -1
View File
@@ -20,6 +20,8 @@ use crate::codegen::{CodegenConfig, CompiledModule, compile_consolidated_module,
use crate::config::WaferConfig;
use crate::dictionary::{Dictionary, DictionaryState, WordId};
use crate::ir::IrOp;
#[cfg(feature = "crypto")]
use crate::memory::HASH_SCRATCH_BASE;
use crate::memory::{
CELL_SIZE, DATA_STACK_TOP, FLOAT_SIZE, FLOAT_STACK_BASE, FLOAT_STACK_TOP, INPUT_BUFFER_BASE,
INPUT_BUFFER_SIZE, RETURN_STACK_TOP, SYSVAR_BASE_VAR, SYSVAR_HERE, SYSVAR_LEAVE_FLAG,
@@ -383,6 +385,12 @@ impl<R: Runtime> ForthVM<R> {
s
}
/// Mutable access to the underlying runtime — useful for tests and for
/// host shims that need to read or write WAFER linear memory directly.
pub fn runtime_mut(&mut self) -> &mut R {
&mut self.rt
}
/// Read the current data stack contents (top-first).
pub fn data_stack(&mut self) -> Vec<i32> {
let sp = self.rt.get_dsp();
@@ -2286,7 +2294,10 @@ impl<R: Runtime> ForthVM<R> {
}
/// Register a primitive whose implementation is a host function (not IR-compiled).
fn register_host_primitive(
///
/// Public so downstream crates (like `kelvar-cli`) can extend the VM with
/// their own I/O host words without forking WAFER.
pub fn register_host_primitive(
&mut self,
name: &str,
immediate: bool,
@@ -2592,6 +2603,10 @@ impl<R: Runtime> ForthVM<R> {
// -- Floating-Point word set --
self.register_float_words()?;
// -- Crypto: SHA1, SHA256, SHA512 (gated) --
#[cfg(feature = "crypto")]
self.register_crypto_words()?;
// Batch-compile all deferred IR primitives into a single WASM module
self.batch_mode = false;
self.batch_compile_deferred()?;
@@ -2636,6 +2651,45 @@ impl<R: Runtime> ForthVM<R> {
Ok(())
}
// -----------------------------------------------------------------------
// Crypto: SHA1 / SHA256 / SHA512 (and any algos in `crypto::ALGOS`)
// -----------------------------------------------------------------------
/// Register one Forth word per entry in [`crate::crypto::ALGOS`].
///
/// Each word has stack effect `( c-addr u -- c-addr2 u2 )`: it hashes
/// the `u` bytes at `c-addr` and writes the digest into the shared
/// scratch region at [`crate::memory::HASH_SCRATCH_BASE`]. The output
/// is overwritten by every subsequent hash call.
#[cfg(feature = "crypto")]
fn register_crypto_words(&mut self) -> anyhow::Result<()> {
for algo in crate::crypto::ALGOS {
let hash_fn = algo.hash;
let digest_len = algo.digest_len as i32;
let func: HostFn = Box::new(move |ctx: &mut dyn HostAccess| {
// Pop ( c-addr u )
let dsp = ctx.get_dsp();
let u = ctx.mem_read_i32(dsp) as u32;
let c_addr = ctx.mem_read_i32(dsp + CELL_SIZE) as u32;
// Read input bytes and hash.
let bytes = ctx.mem_read_slice(c_addr, u as usize);
let digest = hash_fn(&bytes);
// Write digest to scratch.
ctx.mem_write_slice(HASH_SCRATCH_BASE, &digest);
// Push ( scratch-addr digest-len ) — same dsp position, two
// cells overwritten in place.
ctx.mem_write_i32(dsp + CELL_SIZE, HASH_SCRATCH_BASE as i32);
ctx.mem_write_i32(dsp, digest_len);
Ok(())
});
self.register_host_primitive(algo.name, false, func)?;
}
Ok(())
}
// -----------------------------------------------------------------------
// Priority 1: Loop support host functions
// -----------------------------------------------------------------------