Initial commit: WAFER (WebAssembly Forth Engine in Rust)

Optimizing Forth 2012 compiler targeting WebAssembly with IR-based compilation pipeline, multi-typed stack inference, subroutine threading, and JIT/consolidation modes. Rust kernel with ~35 primitives and Forth standard library for core/core-ext word sets.
2026-03-29 22:14:53 +02:00
commit 683281363d
33 changed files with 5084 additions and 0 deletions
@@ -0,0 +1,21 @@
+//! WASM code generation from IR.
+//!
+//! Translates optimized IR into WASM bytecode using the `wasm-encoder` crate.
+//! Supports two modes:
+//! - **Typed mode**: when type inference succeeds, values stay in WASM locals
+//! - **Fallback mode**: load/store against stack pointer globals in linear memory
+
+// TODO: Step 5 - Full codegen implementation
+// - IR -> WASM function body translation
+// - Single-word module generation (JIT mode)
+// - Multi-word module generation (AOT/consolidation mode)
+// - Typed vs fallback mode selection
+// - Function table management
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn placeholder() {
+        // Codegen tests will be added in Step 5
+    }
+}
@@ -0,0 +1,21 @@
+//! Forth compile mode: builds IR from word definitions.
+//!
+//! When the outer interpreter encounters `:`, it switches to compile mode.
+//! The compiler collects tokens and builds an IR representation until `;`.
+//! IMMEDIATE words are executed during compilation (e.g., IF, ELSE, THEN).
+
+// TODO: Step 7 - Compiler implementation
+// - : (colon) starts compilation, ; (semicolon) ends it
+// - Build Vec<IrOp> for the word body
+// - Handle IMMEDIATE words
+// - Handle control structures (IF/ELSE/THEN, DO/LOOP, BEGIN/UNTIL)
+// - LITERAL, POSTPONE, ['], [CHAR]
+// - Defining words: VARIABLE, CONSTANT, CREATE, DOES>
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn placeholder() {
+        // Compiler tests will be added in Step 7
+    }
+}
@@ -0,0 +1,16 @@
+//! Consolidation recompiler: merge all JIT-compiled words into a single WASM module.
+//!
+//! After interactive development, `CONSOLIDATE` recompiles everything:
+//! - All `call_indirect` replaced with direct `call`
+//! - Cross-word optimizations (inlining, constant propagation)
+//! - Single WASM module output for maximum performance
+
+// TODO: Step 12 - Consolidation recompiler implementation
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn placeholder() {
+        // Consolidation tests will be added in Step 12
+    }
+}
@@ -0,0 +1,751 @@
+//! Forth dictionary: word headers, lookup, and creation.
+//!
+//! The dictionary is a linked list in linear memory. Each entry contains:
+//! - Link to previous entry (4 bytes)
+//! - Flags + name length (1 byte)
+//! - Name string (N bytes, padded to cell alignment)
+//! - Code field: function table index (4 bytes)
+//! - Parameter field: data for CREATEd words, DOES> action, etc.
+
+use crate::error::{WaferError, WaferResult};
+use crate::memory::{DICTIONARY_BASE, INITIAL_PAGES, PAGE_SIZE};
+
+/// Flags stored in the dictionary entry header.
+pub mod flags {
+    /// Word executes during compilation.
+    pub const IMMEDIATE: u8 = 0x80;
+    /// Word is hidden (being compiled, not yet findable).
+    pub const HIDDEN: u8 = 0x40;
+    /// Mask for the name length (lower 5 bits).
+    pub const LENGTH_MASK: u8 = 0x1F;
+    /// Maximum word name length.
+    pub const MAX_NAME_LEN: usize = 31;
+}
+
+/// Unique identifier for a word in the dictionary.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub struct WordId(pub u32);
+
+/// The dictionary manages word entries in a simulated linear memory buffer.
+pub struct Dictionary {
+    /// The memory buffer (simulates WASM linear memory).
+    memory: Vec<u8>,
+    /// Address of the most recently defined word (LATEST).
+    latest: u32,
+    /// Next free address in the dictionary (HERE).
+    here: u32,
+    /// Next available function table index.
+    next_fn_index: u32,
+}
+
+/// Align an address upward to a 4-byte boundary.
+fn align4(addr: u32) -> u32 {
+    (addr + 3) & !3
+}
+
+impl Dictionary {
+    /// Create a new dictionary with the given memory buffer.
+    /// `here` is initialized to `DICTIONARY_BASE` from memory.rs.
+    pub fn new() -> Self {
+        let size = (INITIAL_PAGES * PAGE_SIZE) as usize;
+        Self {
+            memory: vec![0u8; size],
+            latest: 0,
+            here: DICTIONARY_BASE,
+            next_fn_index: 0,
+        }
+    }
+
+    /// Create a new dictionary entry (like Forth's CREATE).
+    /// Returns the WordId (function table index) assigned to this word.
+    /// The word starts HIDDEN (will be revealed when compilation completes).
+    pub fn create(&mut self, name: &str, immediate: bool) -> WaferResult<WordId> {
+        let name_upper = name.to_ascii_uppercase();
+        let name_bytes = name_upper.as_bytes();
+        let name_len = name_bytes.len();
+
+        if name_len == 0 || name_len > flags::MAX_NAME_LEN {
+            return Err(WaferError::NameTooLong(name.to_string()));
+        }
+
+        // Calculate the total space needed:
+        //   4 (link) + 1 (flags) + name_len + padding + 4 (code field)
+        let entry_start = self.here;
+        let name_end = entry_start + 4 + 1 + name_len as u32;
+        let code_field_addr = align4(name_end);
+        let after_code = code_field_addr + 4;
+
+        // Check bounds
+        if after_code as usize > self.memory.len() {
+            return Err(WaferError::DictionaryOverflow);
+        }
+
+        // Write link field (points to previous LATEST)
+        self.write_u32_unchecked(entry_start, self.latest);
+
+        // Write flags byte: HIDDEN | length, optionally IMMEDIATE
+        let mut flag_byte = flags::HIDDEN | (name_len as u8 & flags::LENGTH_MASK);
+        if immediate {
+            flag_byte |= flags::IMMEDIATE;
+        }
+        self.memory[(entry_start + 4) as usize] = flag_byte;
+
+        // Write name bytes
+        let name_start = (entry_start + 5) as usize;
+        self.memory[name_start..name_start + name_len].copy_from_slice(name_bytes);
+
+        // Zero padding bytes between name end and code field
+        for i in (name_end as usize)..(code_field_addr as usize) {
+            self.memory[i] = 0;
+        }
+
+        // Write code field (function table index)
+        let fn_index = self.next_fn_index;
+        self.write_u32_unchecked(code_field_addr, fn_index);
+        self.next_fn_index += 1;
+
+        // Update LATEST and HERE
+        self.latest = entry_start;
+        self.here = after_code;
+
+        Ok(WordId(fn_index))
+    }
+
+    /// Reveal the most recent word (remove HIDDEN flag).
+    /// Called after `: ... ;` completes compilation.
+    pub fn reveal(&mut self) {
+        if self.latest == 0 && self.here == DICTIONARY_BASE {
+            // No words defined yet
+            return;
+        }
+        let flags_addr = (self.latest + 4) as usize;
+        if flags_addr < self.memory.len() {
+            self.memory[flags_addr] &= !flags::HIDDEN;
+        }
+    }
+
+    /// Set the code field of the most recent word.
+    pub fn set_code_field(&mut self, word_addr: u32, fn_index: u32) {
+        if let Ok(code_addr) = self.code_field_addr(word_addr) {
+            self.write_u32_unchecked(code_addr, fn_index);
+        }
+    }
+
+    /// Look up a word by name. Returns (word_address, word_id, is_immediate).
+    /// Searches from LATEST backward through the linked list.
+    /// Skips HIDDEN words.
+    pub fn find(&self, name: &str) -> Option<(u32, WordId, bool)> {
+        let search_name = name.to_ascii_uppercase();
+        let search_bytes = search_name.as_bytes();
+        let search_len = search_bytes.len();
+
+        let mut addr = self.latest;
+        while addr != 0 || (addr == self.latest && self.latest != 0) {
+            let flags_byte = self.memory[(addr + 4) as usize];
+
+            // Skip hidden words
+            if flags_byte & flags::HIDDEN == 0 {
+                let entry_len = (flags_byte & flags::LENGTH_MASK) as usize;
+
+                if entry_len == search_len {
+                    let name_start = (addr + 5) as usize;
+                    let entry_name = &self.memory[name_start..name_start + entry_len];
+
+                    if entry_name == search_bytes {
+                        let is_immediate = flags_byte & flags::IMMEDIATE != 0;
+                        let code_addr = align4(addr + 5 + entry_len as u32);
+                        let fn_index = self.read_u32_unchecked(code_addr);
+                        return Some((addr, WordId(fn_index), is_immediate));
+                    }
+                }
+            }
+
+            // Follow link to previous entry
+            let link = self.read_u32_unchecked(addr);
+            if link == addr {
+                // Safety: prevent infinite loops
+                break;
+            }
+            addr = link;
+            if addr == 0 {
+                break;
+            }
+        }
+
+        None
+    }
+
+    /// Get the current HERE pointer.
+    pub fn here(&self) -> u32 {
+        self.here
+    }
+
+    /// Get the current LATEST pointer.
+    pub fn latest(&self) -> u32 {
+        self.latest
+    }
+
+    /// Allocate n bytes at HERE (like Forth's ALLOT).
+    pub fn allot(&mut self, n: u32) -> WaferResult<u32> {
+        let new_here = self
+            .here
+            .checked_add(n)
+            .ok_or(WaferError::DictionaryOverflow)?;
+        if new_here as usize > self.memory.len() {
+            return Err(WaferError::DictionaryOverflow);
+        }
+        let old_here = self.here;
+        self.here = new_here;
+        Ok(old_here)
+    }
+
+    /// Store a cell (u32) at HERE and advance HERE by 4 (like Forth's `,`).
+    pub fn comma(&mut self, value: u32) -> WaferResult<()> {
+        let addr = self.here;
+        if (addr + 4) as usize > self.memory.len() {
+            return Err(WaferError::DictionaryOverflow);
+        }
+        self.write_u32_unchecked(addr, value);
+        self.here += 4;
+        Ok(())
+    }
+
+    /// Store a byte at HERE and advance HERE by 1 (like Forth's `C,`).
+    pub fn c_comma(&mut self, value: u8) -> WaferResult<()> {
+        let addr = self.here as usize;
+        if addr >= self.memory.len() {
+            return Err(WaferError::DictionaryOverflow);
+        }
+        self.memory[addr] = value;
+        self.here += 1;
+        Ok(())
+    }
+
+    /// Read a cell (u32) from the given address.
+    pub fn read_u32(&self, addr: u32) -> WaferResult<u32> {
+        let a = addr as usize;
+        if a + 4 > self.memory.len() {
+            return Err(WaferError::InvalidAddress(addr));
+        }
+        Ok(u32::from_le_bytes([
+            self.memory[a],
+            self.memory[a + 1],
+            self.memory[a + 2],
+            self.memory[a + 3],
+        ]))
+    }
+
+    /// Write a cell (u32) to the given address.
+    pub fn write_u32(&mut self, addr: u32, value: u32) -> WaferResult<()> {
+        let a = addr as usize;
+        if a + 4 > self.memory.len() {
+            return Err(WaferError::InvalidAddress(addr));
+        }
+        let bytes = value.to_le_bytes();
+        self.memory[a..a + 4].copy_from_slice(&bytes);
+        Ok(())
+    }
+
+    /// Read a byte from the given address.
+    pub fn read_u8(&self, addr: u32) -> WaferResult<u8> {
+        let a = addr as usize;
+        if a >= self.memory.len() {
+            return Err(WaferError::InvalidAddress(addr));
+        }
+        Ok(self.memory[a])
+    }
+
+    /// Write a byte to the given address.
+    pub fn write_u8(&mut self, addr: u32, value: u8) -> WaferResult<()> {
+        let a = addr as usize;
+        if a >= self.memory.len() {
+            return Err(WaferError::InvalidAddress(addr));
+        }
+        self.memory[a] = value;
+        Ok(())
+    }
+
+    /// Get the name of the word at the given address.
+    pub fn word_name(&self, word_addr: u32) -> WaferResult<String> {
+        let flags_addr = (word_addr + 4) as usize;
+        if flags_addr >= self.memory.len() {
+            return Err(WaferError::InvalidAddress(word_addr));
+        }
+        let flags_byte = self.memory[flags_addr];
+        let name_len = (flags_byte & flags::LENGTH_MASK) as usize;
+        let name_start = (word_addr + 5) as usize;
+        let name_end = name_start + name_len;
+        if name_end > self.memory.len() {
+            return Err(WaferError::InvalidAddress(word_addr));
+        }
+        let name_bytes = &self.memory[name_start..name_end];
+        Ok(String::from_utf8_lossy(name_bytes).to_string())
+    }
+
+    /// Get the code field (function index) of the word at the given address.
+    pub fn code_field(&self, word_addr: u32) -> WaferResult<u32> {
+        let code_addr = self.code_field_addr(word_addr)?;
+        self.read_u32(code_addr)
+    }
+
+    /// Get the parameter field address of the word at the given address.
+    pub fn param_field_addr(&self, word_addr: u32) -> WaferResult<u32> {
+        let code_addr = self.code_field_addr(word_addr)?;
+        Ok(code_addr + 4)
+    }
+
+    /// Toggle the IMMEDIATE flag on the most recent word.
+    pub fn toggle_immediate(&mut self) -> WaferResult<()> {
+        if self.latest == 0 && self.here == DICTIONARY_BASE {
+            return Err(WaferError::CompileError(
+                "no word defined yet".to_string(),
+            ));
+        }
+        let flags_addr = (self.latest + 4) as usize;
+        if flags_addr >= self.memory.len() {
+            return Err(WaferError::InvalidAddress(self.latest + 4));
+        }
+        self.memory[flags_addr] ^= flags::IMMEDIATE;
+        Ok(())
+    }
+
+    /// Get a reference to the raw memory buffer.
+    pub fn memory(&self) -> &[u8] {
+        &self.memory
+    }
+
+    /// Get a mutable reference to the raw memory buffer.
+    pub fn memory_mut(&mut self) -> &mut Vec<u8> {
+        &mut self.memory
+    }
+
+    // -- Private helpers --
+
+    /// Compute the address of the code field for the word at `word_addr`.
+    fn code_field_addr(&self, word_addr: u32) -> WaferResult<u32> {
+        let flags_addr = (word_addr + 4) as usize;
+        if flags_addr >= self.memory.len() {
+            return Err(WaferError::InvalidAddress(word_addr));
+        }
+        let flags_byte = self.memory[flags_addr];
+        let name_len = (flags_byte & flags::LENGTH_MASK) as u32;
+        Ok(align4(word_addr + 5 + name_len))
+    }
+
+    /// Write a u32 in little-endian without bounds checking.
+    /// Caller must ensure addr + 4 <= memory.len().
+    fn write_u32_unchecked(&mut self, addr: u32, value: u32) {
+        let a = addr as usize;
+        let bytes = value.to_le_bytes();
+        self.memory[a..a + 4].copy_from_slice(&bytes);
+    }
+
+    /// Read a u32 in little-endian without bounds checking.
+    /// Caller must ensure addr + 4 <= memory.len().
+    fn read_u32_unchecked(&self, addr: u32) -> u32 {
+        let a = addr as usize;
+        u32::from_le_bytes([
+            self.memory[a],
+            self.memory[a + 1],
+            self.memory[a + 2],
+            self.memory[a + 3],
+        ])
+    }
+}
+
+impl Default for Dictionary {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::memory::DICTIONARY_BASE;
+
+    #[test]
+    fn flag_constants() {
+        // Flags should not overlap with name length
+        assert_eq!(flags::IMMEDIATE & flags::LENGTH_MASK, 0);
+        assert_eq!(flags::HIDDEN & flags::LENGTH_MASK, 0);
+        // Max name length fits in the length mask
+        assert!(flags::MAX_NAME_LEN <= flags::LENGTH_MASK as usize);
+    }
+
+    #[test]
+    fn create_and_find_word() {
+        let mut dict = Dictionary::new();
+        let word_id = dict.create("dup", false).unwrap();
+        dict.reveal();
+
+        let result = dict.find("DUP");
+        assert!(result.is_some());
+        let (addr, found_id, is_imm) = result.unwrap();
+        assert_eq!(found_id, word_id);
+        assert!(!is_imm);
+        assert_eq!(addr, DICTIONARY_BASE);
+    }
+
+    #[test]
+    fn create_multiple_words_and_find_each() {
+        let mut dict = Dictionary::new();
+
+        let id_a = dict.create("ALPHA", false).unwrap();
+        dict.reveal();
+        let id_b = dict.create("BETA", false).unwrap();
+        dict.reveal();
+        let id_c = dict.create("GAMMA", false).unwrap();
+        dict.reveal();
+
+        let (_, fid_a, _) = dict.find("ALPHA").unwrap();
+        let (_, fid_b, _) = dict.find("BETA").unwrap();
+        let (_, fid_c, _) = dict.find("GAMMA").unwrap();
+
+        assert_eq!(fid_a, id_a);
+        assert_eq!(fid_b, id_b);
+        assert_eq!(fid_c, id_c);
+    }
+
+    #[test]
+    fn case_insensitive_lookup() {
+        let mut dict = Dictionary::new();
+        dict.create("Hello", false).unwrap();
+        dict.reveal();
+
+        // All case variants should find the same word
+        assert!(dict.find("HELLO").is_some());
+        assert!(dict.find("hello").is_some());
+        assert!(dict.find("hElLo").is_some());
+    }
+
+    #[test]
+    fn hidden_words_not_found() {
+        let mut dict = Dictionary::new();
+        dict.create("SECRET", false).unwrap();
+        // Don't reveal
+
+        assert!(dict.find("SECRET").is_none());
+    }
+
+    #[test]
+    fn reveal_makes_hidden_word_findable() {
+        let mut dict = Dictionary::new();
+        dict.create("HIDDEN", false).unwrap();
+        assert!(dict.find("HIDDEN").is_none());
+
+        dict.reveal();
+        assert!(dict.find("HIDDEN").is_some());
+    }
+
+    #[test]
+    fn immediate_flag_works() {
+        let mut dict = Dictionary::new();
+        let word_id = dict.create("IF", true).unwrap();
+        dict.reveal();
+
+        let (_, found_id, is_imm) = dict.find("IF").unwrap();
+        assert_eq!(found_id, word_id);
+        assert!(is_imm);
+    }
+
+    #[test]
+    fn toggle_immediate() {
+        let mut dict = Dictionary::new();
+        dict.create("MYWORD", false).unwrap();
+        dict.reveal();
+
+        // Initially not immediate
+        let (_, _, is_imm) = dict.find("MYWORD").unwrap();
+        assert!(!is_imm);
+
+        // Toggle to immediate
+        dict.toggle_immediate().unwrap();
+        let (_, _, is_imm) = dict.find("MYWORD").unwrap();
+        assert!(is_imm);
+
+        // Toggle back
+        dict.toggle_immediate().unwrap();
+        let (_, _, is_imm) = dict.find("MYWORD").unwrap();
+        assert!(!is_imm);
+    }
+
+    #[test]
+    fn comma_advances_here() {
+        let mut dict = Dictionary::new();
+        let h0 = dict.here();
+        dict.comma(42).unwrap();
+        assert_eq!(dict.here(), h0 + 4);
+
+        // Verify the value was stored
+        let val = dict.read_u32(h0).unwrap();
+        assert_eq!(val, 42);
+    }
+
+    #[test]
+    fn c_comma_advances_here() {
+        let mut dict = Dictionary::new();
+        let h0 = dict.here();
+        dict.c_comma(0xAB).unwrap();
+        assert_eq!(dict.here(), h0 + 1);
+
+        // Verify the value was stored
+        let val = dict.read_u8(h0).unwrap();
+        assert_eq!(val, 0xAB);
+    }
+
+    #[test]
+    fn allot_advances_here() {
+        let mut dict = Dictionary::new();
+        let h0 = dict.here();
+        let old = dict.allot(100).unwrap();
+        assert_eq!(old, h0);
+        assert_eq!(dict.here(), h0 + 100);
+    }
+
+    #[test]
+    fn memory_read_write_u32() {
+        let mut dict = Dictionary::new();
+        let addr = DICTIONARY_BASE;
+        dict.write_u32(addr, 0xDEADBEEF).unwrap();
+        let val = dict.read_u32(addr).unwrap();
+        assert_eq!(val, 0xDEADBEEF);
+    }
+
+    #[test]
+    fn memory_read_write_u8() {
+        let mut dict = Dictionary::new();
+        let addr = DICTIONARY_BASE;
+        dict.write_u8(addr, 0x42).unwrap();
+        let val = dict.read_u8(addr).unwrap();
+        assert_eq!(val, 0x42);
+    }
+
+    #[test]
+    fn max_name_length() {
+        let mut dict = Dictionary::new();
+        let name = "A".repeat(31); // MAX_NAME_LEN = 31
+        let result = dict.create(&name, false);
+        assert!(result.is_ok());
+        dict.reveal();
+
+        let found = dict.find(&name);
+        assert!(found.is_some());
+        let (_, _, _) = found.unwrap();
+
+        // Verify the name stored correctly
+        let word_name = dict.word_name(dict.latest()).unwrap();
+        assert_eq!(word_name, name);
+    }
+
+    #[test]
+    fn name_too_long_rejected() {
+        let mut dict = Dictionary::new();
+        let name = "A".repeat(32); // Exceeds MAX_NAME_LEN
+        let result = dict.create(&name, false);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn empty_name_rejected() {
+        let mut dict = Dictionary::new();
+        let result = dict.create("", false);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn unknown_word_returns_none() {
+        let mut dict = Dictionary::new();
+        dict.create("EXISTS", false).unwrap();
+        dict.reveal();
+
+        assert!(dict.find("DOESNOTEXIST").is_none());
+    }
+
+    #[test]
+    fn param_field_addr_calculation() {
+        let mut dict = Dictionary::new();
+        dict.create("VAR", false).unwrap();
+        dict.reveal();
+
+        let word_addr = dict.latest();
+        let pfa = dict.param_field_addr(word_addr).unwrap();
+        let cfa_addr = align4(word_addr + 5 + 3); // "VAR" is 3 bytes
+        assert_eq!(pfa, cfa_addr + 4);
+
+        // HERE should equal the parameter field address right after create
+        assert_eq!(dict.here(), pfa);
+    }
+
+    #[test]
+    fn dictionary_overflow_detection() {
+        let mut dict = Dictionary::new();
+        let mem_size = dict.memory().len() as u32;
+
+        // Try to allot beyond memory
+        let result = dict.allot(mem_size + 1);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn invalid_address_read() {
+        let dict = Dictionary::new();
+        let mem_size = dict.memory().len() as u32;
+
+        // Reading beyond the end should fail
+        assert!(dict.read_u32(mem_size).is_err());
+        assert!(dict.read_u8(mem_size).is_err());
+    }
+
+    #[test]
+    fn invalid_address_write() {
+        let mut dict = Dictionary::new();
+        let mem_size = dict.memory().len() as u32;
+
+        // Writing beyond the end should fail
+        assert!(dict.write_u32(mem_size, 0).is_err());
+        assert!(dict.write_u8(mem_size, 0).is_err());
+    }
+
+    #[test]
+    fn set_code_field_updates_function_index() {
+        let mut dict = Dictionary::new();
+        dict.create("TEST", false).unwrap();
+        dict.reveal();
+
+        let word_addr = dict.latest();
+        dict.set_code_field(word_addr, 999);
+
+        let code = dict.code_field(word_addr).unwrap();
+        assert_eq!(code, 999);
+    }
+
+    #[test]
+    fn word_name_retrieval() {
+        let mut dict = Dictionary::new();
+        dict.create("HELLO", false).unwrap();
+        dict.reveal();
+
+        let name = dict.word_name(dict.latest()).unwrap();
+        assert_eq!(name, "HELLO");
+    }
+
+    #[test]
+    fn linked_list_traversal() {
+        // Verify that the linked list structure is correct
+        let mut dict = Dictionary::new();
+
+        let addr0 = dict.here();
+        dict.create("FIRST", false).unwrap();
+        dict.reveal();
+        assert_eq!(dict.latest(), addr0);
+
+        let addr1 = dict.here();
+        dict.create("SECOND", false).unwrap();
+        dict.reveal();
+        assert_eq!(dict.latest(), addr1);
+
+        // Second word's link should point to first word
+        let link = dict.read_u32(addr1).unwrap();
+        assert_eq!(link, addr0);
+
+        // First word's link should be 0 (end of list)
+        let link = dict.read_u32(addr0).unwrap();
+        assert_eq!(link, 0);
+    }
+
+    #[test]
+    fn later_definition_shadows_earlier() {
+        let mut dict = Dictionary::new();
+
+        let id1 = dict.create("DUP", false).unwrap();
+        dict.reveal();
+        let id2 = dict.create("DUP", false).unwrap();
+        dict.reveal();
+
+        // find should return the later (most recent) definition
+        let (_, found_id, _) = dict.find("DUP").unwrap();
+        assert_eq!(found_id, id2);
+        assert_ne!(id1, id2);
+    }
+
+    #[test]
+    fn alignment_padding() {
+        let mut dict = Dictionary::new();
+
+        // "AB" is 2 bytes at offset 5 => name_end = base + 4 + 1 + 2 = base + 7
+        // align4(base + 7) should round up properly
+        dict.create("AB", false).unwrap();
+        dict.reveal();
+
+        let word_addr = dict.latest();
+        let pfa = dict.param_field_addr(word_addr).unwrap();
+        // code field should be at align4(word_addr + 5 + 2) = align4(word_addr + 7)
+        let expected_code = align4(word_addr + 7);
+        assert_eq!(pfa, expected_code + 4);
+        // HERE should be 4-byte aligned
+        assert_eq!(dict.here() % 4, 0);
+    }
+
+    #[test]
+    fn memory_access() {
+        let mut dict = Dictionary::new();
+
+        // Test raw memory access
+        let mem = dict.memory();
+        assert_eq!(mem.len(), (INITIAL_PAGES * PAGE_SIZE) as usize);
+
+        // Test mutable access
+        let mem = dict.memory_mut();
+        mem[0] = 0xFF;
+        assert_eq!(dict.memory()[0], 0xFF);
+    }
+
+    #[test]
+    fn default_trait() {
+        let dict = Dictionary::default();
+        assert_eq!(dict.here(), DICTIONARY_BASE);
+        assert_eq!(dict.latest(), 0);
+    }
+
+    #[test]
+    fn comma_overflow() {
+        let mut dict = Dictionary::new();
+        // Move HERE to near the end of memory
+        let mem_size = dict.memory().len() as u32;
+        dict.here = mem_size - 2; // Only 2 bytes left
+        let result = dict.comma(42);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn c_comma_overflow() {
+        let mut dict = Dictionary::new();
+        let mem_size = dict.memory().len() as u32;
+        dict.here = mem_size; // No space left
+        let result = dict.c_comma(42);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn word_ids_are_sequential() {
+        let mut dict = Dictionary::new();
+        let id0 = dict.create("A", false).unwrap();
+        dict.reveal();
+        let id1 = dict.create("B", false).unwrap();
+        dict.reveal();
+        let id2 = dict.create("C", false).unwrap();
+        dict.reveal();
+
+        assert_eq!(id0, WordId(0));
+        assert_eq!(id1, WordId(1));
+        assert_eq!(id2, WordId(2));
+    }
+
+    #[test]
+    fn toggle_immediate_no_word_errors() {
+        let mut dict = Dictionary::new();
+        let result = dict.toggle_immediate();
+        assert!(result.is_err());
+    }
+}
@@ -0,0 +1,84 @@
+//! Error types for the WAFER compiler and runtime.
+
+use thiserror::Error;
+
+/// Errors that can occur during WAFER compilation and execution.
+#[derive(Debug, Error)]
+pub enum WaferError {
+    #[error("stack underflow")]
+    StackUnderflow,
+
+    #[error("stack overflow")]
+    StackOverflow,
+
+    #[error("return stack underflow")]
+    ReturnStackUnderflow,
+
+    #[error("return stack overflow")]
+    ReturnStackOverflow,
+
+    #[error("float stack underflow")]
+    FloatStackUnderflow,
+
+    #[error("float stack overflow")]
+    FloatStackOverflow,
+
+    #[error("unknown word: {0}")]
+    UnknownWord(String),
+
+    #[error("division by zero")]
+    DivisionByZero,
+
+    #[error("invalid memory address: {0:#x}")]
+    InvalidAddress(u32),
+
+    #[error("dictionary overflow")]
+    DictionaryOverflow,
+
+    #[error("compilation error: {0}")]
+    CompileError(String),
+
+    #[error("invalid number: {0}")]
+    InvalidNumber(String),
+
+    #[error("word name too long: {0}")]
+    NameTooLong(String),
+
+    #[error("control structure mismatch: {0}")]
+    ControlMismatch(String),
+
+    #[error("WASM codegen error: {0}")]
+    CodegenError(String),
+
+    #[error("WASM validation error: {0}")]
+    ValidationError(String),
+
+    #[error("I/O error: {0}")]
+    IoError(String),
+
+    #[error("THROW code {0}")]
+    Throw(i32),
+
+    #[error("{0}")]
+    Abort(String),
+}
+
+/// Result type alias for WAFER operations.
+pub type WaferResult<T> = Result<T, WaferError>;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn error_display() {
+        let err = WaferError::UnknownWord("FOO".to_string());
+        assert_eq!(err.to_string(), "unknown word: FOO");
+    }
+
+    #[test]
+    fn error_throw_code() {
+        let err = WaferError::Throw(-1);
+        assert_eq!(err.to_string(), "THROW code -1");
+    }
+}
@@ -0,0 +1,159 @@
+//! Intermediate representation for WAFER's compilation pipeline.
+//!
+//! The IR sits between parsing/compilation and WASM codegen.
+//! Optimization passes transform IR before it reaches codegen.
+
+use crate::dictionary::WordId;
+
+/// A single IR operation.
+#[derive(Debug, Clone, PartialEq)]
+pub enum IrOp {
+    // -- Literals --
+    /// Push a 32-bit integer constant.
+    PushI32(i32),
+    /// Push a 64-bit integer constant (double-cell).
+    PushI64(i64),
+    /// Push a 64-bit float constant.
+    PushF64(f64),
+
+    // -- Stack manipulation --
+    Drop,
+    Dup,
+    Swap,
+    Over,
+    Rot,
+    Nip,
+    Tuck,
+
+    // -- Arithmetic --
+    Add,
+    Sub,
+    Mul,
+    /// Combined division and modulus: ( n1 n2 -- rem quot )
+    DivMod,
+    Negate,
+    Abs,
+
+    // -- Comparison --
+    Eq,
+    NotEq,
+    Lt,
+    Gt,
+    LtUnsigned,
+    ZeroEq,
+    ZeroLt,
+
+    // -- Logic --
+    And,
+    Or,
+    Xor,
+    Invert,
+    Lshift,
+    Rshift,
+
+    // -- Memory --
+    /// Fetch cell from address: ( addr -- x )
+    Fetch,
+    /// Store cell to address: ( x addr -- )
+    Store,
+    /// Fetch byte: ( addr -- char )
+    CFetch,
+    /// Store byte: ( char addr -- )
+    CStore,
+    /// Add to cell at address: ( n addr -- )
+    PlusStore,
+
+    // -- Control flow --
+    /// Call another word.
+    Call(WordId),
+    /// Tail-call optimization.
+    TailCall(WordId),
+    /// IF ... ELSE ... THEN
+    If {
+        then_body: Vec<IrOp>,
+        else_body: Option<Vec<IrOp>>,
+    },
+    /// DO ... LOOP
+    DoLoop {
+        body: Vec<IrOp>,
+        is_plus_loop: bool,
+    },
+    /// BEGIN ... UNTIL
+    BeginUntil {
+        body: Vec<IrOp>,
+    },
+    /// BEGIN ... WHILE ... REPEAT
+    BeginWhileRepeat {
+        test: Vec<IrOp>,
+        body: Vec<IrOp>,
+    },
+    /// Return from current word.
+    Exit,
+
+    // -- Return stack --
+    /// Move to return stack: ( x -- ) ( R: -- x )
+    ToR,
+    /// Move from return stack: ( -- x ) ( R: x -- )
+    FromR,
+    /// Copy from return stack: ( -- x ) ( R: x -- x )
+    RFetch,
+
+    // -- I/O --
+    /// Output character: ( char -- )
+    Emit,
+    /// Print number: ( n -- )
+    Dot,
+    /// Output newline.
+    Cr,
+    /// Output string: ( c-addr u -- )
+    Type,
+
+    // -- System --
+    /// Execute word by function table index: ( xt -- )
+    Execute,
+}
+
+/// A compiled word definition as IR.
+#[derive(Debug, Clone)]
+pub struct IrWord {
+    /// Word name.
+    pub name: String,
+    /// The word's body as IR operations.
+    pub body: Vec<IrOp>,
+    /// Whether this word has the IMMEDIATE flag.
+    pub is_immediate: bool,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn ir_word_construction() {
+        let word = IrWord {
+            name: "SQUARE".to_string(),
+            body: vec![IrOp::Dup, IrOp::Mul],
+            is_immediate: false,
+        };
+        assert_eq!(word.name, "SQUARE");
+        assert_eq!(word.body.len(), 2);
+    }
+
+    #[test]
+    fn ir_control_flow() {
+        // : ABS DUP 0< IF NEGATE THEN ;
+        let abs_word = IrWord {
+            name: "ABS".to_string(),
+            body: vec![
+                IrOp::Dup,
+                IrOp::ZeroLt,
+                IrOp::If {
+                    then_body: vec![IrOp::Negate],
+                    else_body: None,
+                },
+            ],
+            is_immediate: false,
+        };
+        assert_eq!(abs_word.body.len(), 3);
+    }
+}
@@ -0,0 +1,30 @@
+//! WAFER Core: WebAssembly Forth Engine in Rust
+//!
+//! This crate provides the core compiler and runtime for WAFER,
+//! an optimizing Forth 2012 compiler targeting WebAssembly.
+//!
+//! # Architecture
+//!
+//! ```text
+//! Forth Source -> Outer Interpreter -> IR -> Optimize -> WASM Codegen
+//! ```
+//!
+//! The compilation pipeline:
+//! 1. **Outer interpreter** tokenizes input and dispatches to interpret/compile mode
+//! 2. **Compiler** builds an intermediate representation (IR) for each word definition
+//! 3. **Type inference** annotates the IR with stack types
+//! 4. **Optimizer** applies transformation passes (constant folding, inlining, etc.)
+//! 5. **Codegen** translates optimized IR to WASM bytecode via `wasm-encoder`
+
+pub mod codegen;
+pub mod compiler;
+pub mod consolidate;
+pub mod dictionary;
+pub mod error;
+pub mod ir;
+pub mod memory;
+pub mod optimizer;
+pub mod outer;
+pub mod primitives;
+pub mod types;
+pub mod words;
@@ -0,0 +1,134 @@
+//! Linear memory layout and stack operations for WAFER.
+//!
+//! WAFER uses WASM linear memory for the dictionary, return stack,
+//! and as a fallback for the data and float stacks when types are unknown.
+//! When type inference succeeds, values stay in WASM locals/operand stack instead.
+
+/// Size of one memory page in WASM (64 KiB).
+pub const PAGE_SIZE: u32 = 65536;
+
+/// Initial number of memory pages.
+pub const INITIAL_PAGES: u32 = 16; // 1 MiB
+
+/// Maximum number of memory pages.
+pub const MAX_PAGES: u32 = 256; // 16 MiB
+
+// Memory region layout
+// All offsets are byte addresses in linear memory.
+
+/// System variables region (STATE, BASE, >IN, HLD, etc.)
+pub const SYSVAR_BASE: u32 = 0x0000;
+/// Size of system variables region.
+pub const SYSVAR_SIZE: u32 = 64;
+
+/// Input buffer for source parsing.
+pub const INPUT_BUFFER_BASE: u32 = SYSVAR_BASE + SYSVAR_SIZE; // 0x0040
+/// Size of input buffer.
+pub const INPUT_BUFFER_SIZE: u32 = 1024;
+
+/// PAD - scratch area for string formatting.
+pub const PAD_BASE: u32 = INPUT_BUFFER_BASE + INPUT_BUFFER_SIZE; // 0x0440
+/// Size of PAD.
+pub const PAD_SIZE: u32 = 256;
+
+/// Data stack region (fallback when types are unknown).
+/// Grows downward from the top of this region.
+pub const DATA_STACK_BASE: u32 = PAD_BASE + PAD_SIZE; // 0x0540
+/// Size of data stack region.
+pub const DATA_STACK_SIZE: u32 = 4096; // 1024 cells
+
+/// Return stack region. Grows downward.
+pub const RETURN_STACK_BASE: u32 = DATA_STACK_BASE + DATA_STACK_SIZE; // 0x1540
+/// Size of return stack region.
+pub const RETURN_STACK_SIZE: u32 = 4096;
+
+/// Floating-point stack region (fallback). Grows downward.
+pub const FLOAT_STACK_BASE: u32 = RETURN_STACK_BASE + RETURN_STACK_SIZE; // 0x2540
+/// Size of float stack region.
+pub const FLOAT_STACK_SIZE: u32 = 2048; // 256 doubles
+
+/// Dictionary region start. Grows upward.
+pub const DICTIONARY_BASE: u32 = FLOAT_STACK_BASE + FLOAT_STACK_SIZE; // 0x2D40
+
+/// Initial top of data stack (grows down from here).
+pub const DATA_STACK_TOP: u32 = DATA_STACK_BASE + DATA_STACK_SIZE;
+
+/// Initial top of return stack (grows down from here).
+pub const RETURN_STACK_TOP: u32 = RETURN_STACK_BASE + RETURN_STACK_SIZE;
+
+/// Initial top of float stack (grows down from here).
+pub const FLOAT_STACK_TOP: u32 = FLOAT_STACK_BASE + FLOAT_STACK_SIZE;
+
+/// Size of one cell (4 bytes for i32).
+pub const CELL_SIZE: u32 = 4;
+
+/// Size of one double-cell (8 bytes).
+pub const DOUBLE_CELL_SIZE: u32 = 8;
+
+/// Size of one float (8 bytes for f64).
+pub const FLOAT_SIZE: u32 = 8;
+
+// System variable offsets within SYSVAR region
+
+/// STATE: 0 = interpreting, -1 (0xFFFFFFFF) = compiling.
+pub const SYSVAR_STATE: u32 = SYSVAR_BASE;
+/// BASE: current number base (default 10).
+pub const SYSVAR_BASE_VAR: u32 = SYSVAR_BASE + 4;
+/// >IN: offset into the input buffer.
+pub const SYSVAR_TO_IN: u32 = SYSVAR_BASE + 8;
+/// HERE: next free dictionary address.
+pub const SYSVAR_HERE: u32 = SYSVAR_BASE + 12;
+/// LATEST: pointer to the most recent dictionary entry.
+pub const SYSVAR_LATEST: u32 = SYSVAR_BASE + 16;
+/// SOURCE-ID: current input source (0 = user input, -1 = string).
+pub const SYSVAR_SOURCE_ID: u32 = SYSVAR_BASE + 20;
+/// #TIB: length of current input.
+pub const SYSVAR_NUM_TIB: u32 = SYSVAR_BASE + 24;
+/// HLD: pointer for pictured numeric output.
+pub const SYSVAR_HLD: u32 = SYSVAR_BASE + 28;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn memory_regions_dont_overlap() {
+        // Each region should start after the previous one ends
+        assert!(INPUT_BUFFER_BASE >= SYSVAR_BASE + SYSVAR_SIZE);
+        assert!(PAD_BASE >= INPUT_BUFFER_BASE + INPUT_BUFFER_SIZE);
+        assert!(DATA_STACK_BASE >= PAD_BASE + PAD_SIZE);
+        assert!(RETURN_STACK_BASE >= DATA_STACK_BASE + DATA_STACK_SIZE);
+        assert!(FLOAT_STACK_BASE >= RETURN_STACK_BASE + RETURN_STACK_SIZE);
+        assert!(DICTIONARY_BASE >= FLOAT_STACK_BASE + FLOAT_STACK_SIZE);
+    }
+
+    #[test]
+    fn dictionary_starts_within_first_page() {
+        assert!(DICTIONARY_BASE < PAGE_SIZE);
+    }
+
+    #[test]
+    fn stack_tops_are_correct() {
+        assert_eq!(DATA_STACK_TOP, DATA_STACK_BASE + DATA_STACK_SIZE);
+        assert_eq!(RETURN_STACK_TOP, RETURN_STACK_BASE + RETURN_STACK_SIZE);
+        assert_eq!(FLOAT_STACK_TOP, FLOAT_STACK_BASE + FLOAT_STACK_SIZE);
+    }
+
+    #[test]
+    fn sysvar_offsets_are_within_region() {
+        let all_offsets = [
+            SYSVAR_STATE,
+            SYSVAR_BASE_VAR,
+            SYSVAR_TO_IN,
+            SYSVAR_HERE,
+            SYSVAR_LATEST,
+            SYSVAR_SOURCE_ID,
+            SYSVAR_NUM_TIB,
+            SYSVAR_HLD,
+        ];
+        for offset in all_offsets {
+            assert!(offset >= SYSVAR_BASE);
+            assert!(offset + CELL_SIZE <= SYSVAR_BASE + SYSVAR_SIZE);
+        }
+    }
+}
@@ -0,0 +1,19 @@
+//! Optimization passes for WAFER's IR.
+//!
+//! Each pass is a function `Vec<IrOp> -> Vec<IrOp>`, composable in sequence:
+//! 1. Constant folding
+//! 2. Strength reduction
+//! 3. Peephole optimization
+//! 4. Inlining
+//! 5. Dead code elimination
+//! 6. Stack-to-local promotion
+
+// TODO: Step 11 - Optimization pass implementations
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn placeholder() {
+        // Optimizer tests will be added in Step 11
+    }
+}
@@ -0,0 +1,24 @@
+//! Outer interpreter: tokenizer, number parser, and interpret/compile dispatch.
+//!
+//! The outer interpreter is the main loop of Forth:
+//! 1. Read a token (whitespace-delimited word)
+//! 2. Look it up in the dictionary
+//! 3. If found: execute (interpret mode) or compile (compile mode)
+//! 4. If not found: try to parse as a number
+//! 5. If number: push (interpret) or compile as literal (compile mode)
+//! 6. If neither: error
+
+// TODO: Step 8 - Outer interpreter implementation
+// - Tokenizer (whitespace splitting, string literals)
+// - Number parsing (decimal, #decimal, $hex, %binary per Forth 2012)
+// - Main interpret/compile dispatch loop
+// - STATE management
+// - EVALUATE support (nested interpretation)
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn placeholder() {
+        // Outer interpreter tests will be added in Step 8
+    }
+}
@@ -0,0 +1,19 @@
+//! Built-in primitive words for WAFER.
+//!
+//! Primitives are the ~35 words that must be implemented in Rust because
+//! they require direct WASM instructions or host interaction.
+//! Everything else is defined in Forth (loaded from .fth files).
+
+// TODO: Step 6 - Primitive word implementations
+// Each primitive provides:
+// - Its StackEffect (type signature)
+// - Its IR representation (for inlining by the optimizer)
+// - Direct WASM instruction generation
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn placeholder() {
+        // Primitive tests will be added in Step 6
+    }
+}
@@ -0,0 +1,106 @@
+//! Type inference engine for WAFER's multi-typed stack.
+//!
+//! WAFER uses type inference to determine when values on the stack have
+//! statically known types. When types are known, codegen uses WASM's native
+//! typed operand stack and locals instead of simulating stacks in linear memory.
+
+/// Types that can appear on WAFER's stack.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum StackType {
+    /// 32-bit integer (default Forth cell).
+    I32,
+    /// 64-bit integer (double-cell).
+    I64,
+    /// 32-bit float.
+    F32,
+    /// 64-bit float (Forth floating-point).
+    F64,
+    /// Boolean (result of comparisons). Represented as i32 at WASM level.
+    Bool,
+    /// Memory address. Represented as i32 at WASM level.
+    Addr,
+    /// Type is unknown or cannot be determined statically.
+    Unknown,
+}
+
+impl StackType {
+    /// Returns the WASM value type for this stack type.
+    pub fn wasm_type(self) -> wasm_encoder::ValType {
+        match self {
+            StackType::I32 | StackType::Bool | StackType::Addr => wasm_encoder::ValType::I32,
+            StackType::I64 => wasm_encoder::ValType::I64,
+            StackType::F32 => wasm_encoder::ValType::F32,
+            StackType::F64 => wasm_encoder::ValType::F64,
+            StackType::Unknown => wasm_encoder::ValType::I32, // default to i32
+        }
+    }
+
+    /// Returns true if this type's WASM representation is i32.
+    pub fn is_i32_compatible(self) -> bool {
+        matches!(
+            self,
+            StackType::I32 | StackType::Bool | StackType::Addr | StackType::Unknown
+        )
+    }
+}
+
+/// Describes the stack effect of a Forth word.
+///
+/// For example, `+` has effect `( I32 I32 -- I32 )`.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct StackEffect {
+    /// Types consumed from the stack (bottom to top).
+    pub inputs: Vec<StackType>,
+    /// Types produced on the stack (bottom to top).
+    pub outputs: Vec<StackType>,
+}
+
+impl StackEffect {
+    /// Create a new stack effect.
+    pub fn new(inputs: Vec<StackType>, outputs: Vec<StackType>) -> Self {
+        Self { inputs, outputs }
+    }
+
+    /// Number of items consumed.
+    pub fn input_count(&self) -> usize {
+        self.inputs.len()
+    }
+
+    /// Number of items produced.
+    pub fn output_count(&self) -> usize {
+        self.outputs.len()
+    }
+
+    /// Net stack depth change.
+    pub fn depth_change(&self) -> i32 {
+        self.outputs.len() as i32 - self.inputs.len() as i32
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn stack_type_wasm_mapping() {
+        assert_eq!(StackType::I32.wasm_type(), wasm_encoder::ValType::I32);
+        assert_eq!(StackType::F64.wasm_type(), wasm_encoder::ValType::F64);
+        assert_eq!(StackType::Bool.wasm_type(), wasm_encoder::ValType::I32);
+        assert_eq!(StackType::Addr.wasm_type(), wasm_encoder::ValType::I32);
+    }
+
+    #[test]
+    fn stack_effect_depth() {
+        // DUP ( x -- x x )
+        let dup = StackEffect::new(vec![StackType::I32], vec![StackType::I32, StackType::I32]);
+        assert_eq!(dup.depth_change(), 1);
+
+        // + ( x y -- z )
+        let add = StackEffect::new(vec![StackType::I32, StackType::I32], vec![StackType::I32]);
+        assert_eq!(add.depth_change(), -1);
+
+        // DROP ( x -- )
+        let drop_e = StackEffect::new(vec![StackType::I32], vec![]);
+        assert_eq!(drop_e.depth_change(), -1);
+    }
+}
@@ -0,0 +1,19 @@
+//! Forth 2012 word set implementations.
+//!
+//! Each submodule implements one word set from the Forth 2012 standard.
+//! Words are implemented in Rust only when they require direct WASM instructions;
+//! most words are defined in Forth source files under `forth/`.
+
+// Word set modules will be added as each set is implemented:
+// pub mod core;
+// pub mod core_ext;
+// pub mod double;
+// pub mod exception;
+// pub mod floating;
+// pub mod locals;
+// pub mod string;
+// pub mod tools;
+// pub mod memory_alloc;
+// pub mod search_order;
+// pub mod file;
+// pub mod facility;