Implement stack-to-local promotion and consolidation recompiler
Stack-to-local promotion (Phase 1: straight-line code): - Words with no control flow/calls use WASM locals instead of memory stack - Stack manipulation (Swap, Rot, Nip, Tuck, Dup, Drop) emits ZERO instructions - ~7x instruction reduction for arithmetic-heavy words like DUP * - Pre-loads consumed items from memory, writes results back at exit Consolidation recompiler (CONSOLIDATE word): - Recompiles all IR-based words into single WASM module - Direct call instructions instead of call_indirect through function table - Cranelift can inline and optimize across word boundaries - All control flow variants support consolidated calls 342 unit tests + 11 compliance, all passing.
This commit is contained in:
@@ -7,6 +7,7 @@
|
||||
//! remains a global.
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use wasm_encoder::{
|
||||
BlockType, CodeSection, ConstExpr, ElementSection, Elements, EntityType, ExportKind,
|
||||
@@ -14,6 +15,7 @@ use wasm_encoder::{
|
||||
MemoryType, Module, RefType, TableType, TypeSection, ValType,
|
||||
};
|
||||
|
||||
use crate::dictionary::WordId;
|
||||
use crate::error::{WaferError, WaferResult};
|
||||
use crate::ir::IrOp;
|
||||
use crate::memory::CELL_SIZE;
|
||||
@@ -954,6 +956,372 @@ pub fn compile_word(
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Consolidated module generation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Emit all IR operations, replacing `Call`/`TailCall` with direct calls
|
||||
/// when the target word is within the consolidated module.
|
||||
fn emit_consolidated_body(f: &mut Function, ops: &[IrOp], local_fn_map: &HashMap<WordId, u32>) {
|
||||
for op in ops {
|
||||
emit_consolidated_op(f, op, local_fn_map);
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit a single IR operation with consolidated call support.
|
||||
///
|
||||
/// For `Call` and `TailCall`, emits a direct `call` if the target is in the
|
||||
/// consolidated module, otherwise falls back to `call_indirect`. For control
|
||||
/// flow with nested bodies, recurses to handle inner calls.
|
||||
fn emit_consolidated_op(f: &mut Function, op: &IrOp, local_fn_map: &HashMap<WordId, u32>) {
|
||||
match op {
|
||||
IrOp::Call(word_id) => {
|
||||
if let Some(&fn_idx) = local_fn_map.get(word_id) {
|
||||
dsp_writeback(f);
|
||||
f.instruction(&Instruction::Call(fn_idx));
|
||||
dsp_reload(f);
|
||||
} else {
|
||||
// Fall back to indirect call for host functions
|
||||
dsp_writeback(f);
|
||||
f.instruction(&Instruction::I32Const(word_id.0 as i32))
|
||||
.instruction(&Instruction::CallIndirect {
|
||||
type_index: TYPE_VOID,
|
||||
table_index: TABLE,
|
||||
});
|
||||
dsp_reload(f);
|
||||
}
|
||||
}
|
||||
|
||||
IrOp::TailCall(word_id) => {
|
||||
if let Some(&fn_idx) = local_fn_map.get(word_id) {
|
||||
dsp_writeback(f);
|
||||
f.instruction(&Instruction::Call(fn_idx));
|
||||
f.instruction(&Instruction::Return);
|
||||
} else {
|
||||
dsp_writeback(f);
|
||||
f.instruction(&Instruction::I32Const(word_id.0 as i32))
|
||||
.instruction(&Instruction::CallIndirect {
|
||||
type_index: TYPE_VOID,
|
||||
table_index: TABLE,
|
||||
});
|
||||
f.instruction(&Instruction::Return);
|
||||
}
|
||||
}
|
||||
|
||||
// Control flow with nested bodies -- recurse for consolidated calls
|
||||
IrOp::If {
|
||||
then_body,
|
||||
else_body,
|
||||
} => {
|
||||
pop(f);
|
||||
f.instruction(&Instruction::If(BlockType::Empty));
|
||||
emit_consolidated_body(f, then_body, local_fn_map);
|
||||
if let Some(eb) = else_body {
|
||||
f.instruction(&Instruction::Else);
|
||||
emit_consolidated_body(f, eb, local_fn_map);
|
||||
}
|
||||
f.instruction(&Instruction::End);
|
||||
}
|
||||
|
||||
IrOp::DoLoop { body, is_plus_loop } => {
|
||||
emit_consolidated_do_loop(f, body, *is_plus_loop, local_fn_map);
|
||||
}
|
||||
|
||||
IrOp::BeginUntil { body } => {
|
||||
f.instruction(&Instruction::Loop(BlockType::Empty));
|
||||
emit_consolidated_body(f, body, local_fn_map);
|
||||
pop(f);
|
||||
f.instruction(&Instruction::I32Eqz)
|
||||
.instruction(&Instruction::BrIf(0))
|
||||
.instruction(&Instruction::End);
|
||||
}
|
||||
|
||||
IrOp::BeginAgain { body } => {
|
||||
f.instruction(&Instruction::Loop(BlockType::Empty));
|
||||
emit_consolidated_body(f, body, local_fn_map);
|
||||
f.instruction(&Instruction::Br(0))
|
||||
.instruction(&Instruction::End);
|
||||
}
|
||||
|
||||
IrOp::BeginWhileRepeat { test, body } => {
|
||||
f.instruction(&Instruction::Block(BlockType::Empty));
|
||||
f.instruction(&Instruction::Loop(BlockType::Empty));
|
||||
emit_consolidated_body(f, test, local_fn_map);
|
||||
pop(f);
|
||||
f.instruction(&Instruction::I32Eqz)
|
||||
.instruction(&Instruction::BrIf(1));
|
||||
emit_consolidated_body(f, body, local_fn_map);
|
||||
f.instruction(&Instruction::Br(0))
|
||||
.instruction(&Instruction::End)
|
||||
.instruction(&Instruction::End);
|
||||
}
|
||||
|
||||
IrOp::BeginDoubleWhileRepeat {
|
||||
outer_test,
|
||||
inner_test,
|
||||
body,
|
||||
after_repeat,
|
||||
else_body,
|
||||
} => {
|
||||
f.instruction(&Instruction::Block(BlockType::Empty)); // $end
|
||||
f.instruction(&Instruction::Block(BlockType::Empty)); // $else
|
||||
f.instruction(&Instruction::Block(BlockType::Empty)); // $after
|
||||
f.instruction(&Instruction::Loop(BlockType::Empty)); // $begin
|
||||
emit_consolidated_body(f, outer_test, local_fn_map);
|
||||
pop(f);
|
||||
f.instruction(&Instruction::I32Eqz)
|
||||
.instruction(&Instruction::BrIf(2)); // to $else
|
||||
emit_consolidated_body(f, inner_test, local_fn_map);
|
||||
pop(f);
|
||||
f.instruction(&Instruction::I32Eqz)
|
||||
.instruction(&Instruction::BrIf(1)); // to $after
|
||||
emit_consolidated_body(f, body, local_fn_map);
|
||||
f.instruction(&Instruction::Br(0)); // back to $begin
|
||||
f.instruction(&Instruction::End); // end loop
|
||||
f.instruction(&Instruction::End); // end $after block
|
||||
emit_consolidated_body(f, after_repeat, local_fn_map);
|
||||
if else_body.is_some() {
|
||||
f.instruction(&Instruction::Br(1)); // skip else, goto $end
|
||||
}
|
||||
f.instruction(&Instruction::End); // end $else block
|
||||
if let Some(eb) = else_body {
|
||||
emit_consolidated_body(f, eb, local_fn_map);
|
||||
}
|
||||
f.instruction(&Instruction::End); // end $end block
|
||||
}
|
||||
|
||||
// All other ops have no nested bodies with calls -- delegate to emit_op
|
||||
other => emit_op(f, other),
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit a DO...LOOP / DO...+LOOP with consolidated call support for the body.
|
||||
fn emit_consolidated_do_loop(
|
||||
f: &mut Function,
|
||||
body: &[IrOp],
|
||||
is_plus_loop: bool,
|
||||
local_fn_map: &HashMap<WordId, u32>,
|
||||
) {
|
||||
// DO ( limit index -- )
|
||||
pop_to(f, SCRATCH_BASE); // index
|
||||
pop_to(f, SCRATCH_BASE + 1); // limit
|
||||
|
||||
// Push limit then index to return stack
|
||||
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1));
|
||||
rpush_via_local(f, SCRATCH_BASE + 2);
|
||||
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
|
||||
rpush_via_local(f, SCRATCH_BASE + 2);
|
||||
|
||||
f.instruction(&Instruction::Block(BlockType::Empty));
|
||||
f.instruction(&Instruction::Loop(BlockType::Empty));
|
||||
|
||||
emit_consolidated_body(f, body, local_fn_map);
|
||||
|
||||
// Pop current index from return stack into scratch local
|
||||
rpop(f);
|
||||
|
||||
if is_plus_loop {
|
||||
f.instruction(&Instruction::LocalSet(SCRATCH_BASE));
|
||||
pop_to(f, SCRATCH_BASE + 2); // step from data stack
|
||||
|
||||
rpeek(f);
|
||||
f.instruction(&Instruction::LocalSet(SCRATCH_BASE + 1));
|
||||
|
||||
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
|
||||
.instruction(&Instruction::I32Sub)
|
||||
.instruction(&Instruction::LocalSet(SCRATCH_BASE + 3));
|
||||
|
||||
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 2))
|
||||
.instruction(&Instruction::I32Add)
|
||||
.instruction(&Instruction::LocalSet(SCRATCH_BASE));
|
||||
|
||||
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
|
||||
rpush_via_local(f, SCRATCH_BASE + 2);
|
||||
|
||||
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 3))
|
||||
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
|
||||
.instruction(&Instruction::I32Sub)
|
||||
.instruction(&Instruction::I32Xor)
|
||||
.instruction(&Instruction::I32Const(0))
|
||||
.instruction(&Instruction::I32LtS)
|
||||
.instruction(&Instruction::BrIf(1))
|
||||
.instruction(&Instruction::Br(0))
|
||||
.instruction(&Instruction::End)
|
||||
.instruction(&Instruction::End);
|
||||
} else {
|
||||
f.instruction(&Instruction::I32Const(1))
|
||||
.instruction(&Instruction::I32Add)
|
||||
.instruction(&Instruction::LocalSet(SCRATCH_BASE));
|
||||
|
||||
rpeek(f);
|
||||
f.instruction(&Instruction::LocalSet(SCRATCH_BASE + 1));
|
||||
|
||||
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
|
||||
rpush_via_local(f, SCRATCH_BASE + 2);
|
||||
|
||||
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
|
||||
.instruction(&Instruction::I32GeS)
|
||||
.instruction(&Instruction::BrIf(1))
|
||||
.instruction(&Instruction::Br(0))
|
||||
.instruction(&Instruction::End)
|
||||
.instruction(&Instruction::End);
|
||||
}
|
||||
|
||||
// Clean up: pop index and limit from return stack
|
||||
rpop(f);
|
||||
f.instruction(&Instruction::Drop);
|
||||
rpop(f);
|
||||
f.instruction(&Instruction::Drop);
|
||||
}
|
||||
|
||||
/// Compile all given words into a single consolidated WASM module.
|
||||
///
|
||||
/// Each word becomes a function in the module. Calls between words within the
|
||||
/// module use direct `call` instructions instead of `call_indirect` through the
|
||||
/// function table, enabling Cranelift to inline and optimize across word
|
||||
/// boundaries.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `words` - Words to consolidate, sorted by `WordId`. Each entry is
|
||||
/// `(WordId, Vec<IrOp>)` containing the word's IR body.
|
||||
/// * `local_fn_map` - Maps each `WordId` in the module to its WASM function
|
||||
/// index (imported functions come first, so defined functions start at 1).
|
||||
/// * `table_size` - Current function table size, used for table import minimum.
|
||||
pub fn compile_consolidated_module(
|
||||
words: &[(WordId, Vec<IrOp>)],
|
||||
local_fn_map: &HashMap<WordId, u32>,
|
||||
table_size: u32,
|
||||
) -> WaferResult<Vec<u8>> {
|
||||
let mut module = Module::new();
|
||||
|
||||
// -- Type section --
|
||||
let mut types = TypeSection::new();
|
||||
types.ty().function([], []); // type 0: () -> ()
|
||||
types.ty().function([ValType::I32], []); // type 1: (i32) -> ()
|
||||
module.section(&types);
|
||||
|
||||
// -- Import section (same as single-word modules) --
|
||||
let mut imports = ImportSection::new();
|
||||
imports.import("env", "emit", EntityType::Function(TYPE_I32));
|
||||
imports.import(
|
||||
"env",
|
||||
"memory",
|
||||
EntityType::Memory(MemoryType {
|
||||
minimum: 1,
|
||||
maximum: None,
|
||||
memory64: false,
|
||||
shared: false,
|
||||
page_size_log2: None,
|
||||
}),
|
||||
);
|
||||
imports.import(
|
||||
"env",
|
||||
"dsp",
|
||||
EntityType::Global(GlobalType {
|
||||
val_type: ValType::I32,
|
||||
mutable: true,
|
||||
shared: false,
|
||||
}),
|
||||
);
|
||||
imports.import(
|
||||
"env",
|
||||
"rsp",
|
||||
EntityType::Global(GlobalType {
|
||||
val_type: ValType::I32,
|
||||
mutable: true,
|
||||
shared: false,
|
||||
}),
|
||||
);
|
||||
imports.import(
|
||||
"env",
|
||||
"fsp",
|
||||
EntityType::Global(GlobalType {
|
||||
val_type: ValType::I32,
|
||||
mutable: true,
|
||||
shared: false,
|
||||
}),
|
||||
);
|
||||
imports.import(
|
||||
"env",
|
||||
"table",
|
||||
EntityType::Table(TableType {
|
||||
element_type: RefType::FUNCREF,
|
||||
minimum: table_size as u64,
|
||||
maximum: None,
|
||||
table64: false,
|
||||
shared: false,
|
||||
}),
|
||||
);
|
||||
module.section(&imports);
|
||||
|
||||
// -- Function section: N functions, all type void --
|
||||
let mut functions = FunctionSection::new();
|
||||
for _ in words {
|
||||
functions.function(TYPE_VOID);
|
||||
}
|
||||
module.section(&functions);
|
||||
|
||||
// -- Export section: export each function as "fn_0", "fn_1", etc. --
|
||||
let mut exports = ExportSection::new();
|
||||
for (i, _) in words.iter().enumerate() {
|
||||
let name = format!("fn_{i}");
|
||||
// +1 because emit is imported function index 0
|
||||
exports.export(&name, ExportKind::Func, (i as u32) + 1);
|
||||
}
|
||||
module.section(&exports);
|
||||
|
||||
// -- Element section: place each function in the table at its WordId slot --
|
||||
// Use a single element section with one active segment per word.
|
||||
let mut elements = ElementSection::new();
|
||||
for (i, (word_id, _)) in words.iter().enumerate() {
|
||||
let offset = ConstExpr::i32_const(word_id.0 as i32);
|
||||
let fn_idx = (i as u32) + 1; // +1 for the emit import
|
||||
let indices = [fn_idx];
|
||||
elements.active(
|
||||
Some(TABLE),
|
||||
&offset,
|
||||
Elements::Functions(Cow::Borrowed(&indices)),
|
||||
);
|
||||
}
|
||||
module.section(&elements);
|
||||
|
||||
// -- Code section: emit each function body --
|
||||
let mut code = CodeSection::new();
|
||||
for (_word_id, body) in words {
|
||||
let num_locals = 1 + count_scratch_locals(body);
|
||||
let mut func = Function::new(vec![(num_locals, ValType::I32)]);
|
||||
|
||||
// Prologue: cache $dsp global into local 0
|
||||
func.instruction(&Instruction::GlobalGet(DSP))
|
||||
.instruction(&Instruction::LocalSet(CACHED_DSP_LOCAL));
|
||||
|
||||
// Body with consolidated call support
|
||||
emit_consolidated_body(&mut func, body, local_fn_map);
|
||||
|
||||
// Epilogue: write cached DSP back to the $dsp global
|
||||
func.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
|
||||
.instruction(&Instruction::GlobalSet(DSP));
|
||||
|
||||
func.instruction(&Instruction::End);
|
||||
code.function(&func);
|
||||
}
|
||||
module.section(&code);
|
||||
|
||||
let bytes = module.finish();
|
||||
|
||||
// Validate
|
||||
wasmparser::validate(&bytes).map_err(|e| {
|
||||
WaferError::ValidationError(format!("Consolidated WASM failed validation: {e}"))
|
||||
})?;
|
||||
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
/// Generate the core/bootstrap WASM module.
|
||||
///
|
||||
/// Not yet implemented -- will be built in a future step.
|
||||
|
||||
@@ -2,15 +2,168 @@
|
||||
//!
|
||||
//! After interactive development, `CONSOLIDATE` recompiles everything:
|
||||
//! - All `call_indirect` replaced with direct `call`
|
||||
//! - Cross-word optimizations (inlining, constant propagation)
|
||||
//! - Single WASM module output for maximum performance
|
||||
|
||||
// TODO: Step 12 - Consolidation recompiler implementation
|
||||
//!
|
||||
//! The implementation lives across two places:
|
||||
//! - `codegen::compile_consolidated_module()` generates the multi-function WASM module
|
||||
//! - `outer::ForthVM::consolidate()` orchestrates collection, compilation, and table update
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::codegen::compile_consolidated_module;
|
||||
use crate::dictionary::WordId;
|
||||
use crate::ir::IrOp;
|
||||
|
||||
#[test]
|
||||
fn placeholder() {
|
||||
// Consolidation tests will be added in Step 12
|
||||
fn consolidated_module_validates_empty() {
|
||||
// Empty word list should produce nothing (but we guard against this at call site)
|
||||
let words = vec![];
|
||||
let map = HashMap::new();
|
||||
let result = compile_consolidated_module(&words, &map, 16);
|
||||
// Empty is valid -- should produce a valid module with no functions
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consolidated_module_validates_single_word() {
|
||||
let words = vec![(WordId(1), vec![IrOp::PushI32(42)])];
|
||||
let mut map = HashMap::new();
|
||||
map.insert(WordId(1), 1u32); // function index 1 (after emit import)
|
||||
let result = compile_consolidated_module(&words, &map, 16);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consolidated_module_validates_multiple_words() {
|
||||
let words = vec![
|
||||
(WordId(1), vec![IrOp::PushI32(1)]),
|
||||
(WordId(2), vec![IrOp::PushI32(2), IrOp::Add]),
|
||||
(
|
||||
WordId(3),
|
||||
vec![IrOp::Call(WordId(1)), IrOp::Call(WordId(2))],
|
||||
),
|
||||
];
|
||||
let mut map = HashMap::new();
|
||||
map.insert(WordId(1), 1u32);
|
||||
map.insert(WordId(2), 2u32);
|
||||
map.insert(WordId(3), 3u32);
|
||||
let result = compile_consolidated_module(&words, &map, 16);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consolidated_module_validates_external_call() {
|
||||
// Word 3 calls WordId(99) which is NOT in the module -- should use call_indirect
|
||||
let words = vec![(WordId(3), vec![IrOp::Call(WordId(99))])];
|
||||
let mut map = HashMap::new();
|
||||
map.insert(WordId(3), 1u32);
|
||||
let result = compile_consolidated_module(&words, &map, 256);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consolidated_module_validates_tail_call() {
|
||||
let words = vec![
|
||||
(WordId(1), vec![IrOp::PushI32(1)]),
|
||||
(WordId(2), vec![IrOp::TailCall(WordId(1))]),
|
||||
];
|
||||
let mut map = HashMap::new();
|
||||
map.insert(WordId(1), 1u32);
|
||||
map.insert(WordId(2), 2u32);
|
||||
let result = compile_consolidated_module(&words, &map, 16);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consolidated_module_validates_control_flow_with_calls() {
|
||||
// IF body contains a call to a consolidated word
|
||||
let words = vec![
|
||||
(WordId(1), vec![IrOp::PushI32(1)]),
|
||||
(
|
||||
WordId(2),
|
||||
vec![
|
||||
IrOp::PushI32(1),
|
||||
IrOp::If {
|
||||
then_body: vec![IrOp::Call(WordId(1))],
|
||||
else_body: Some(vec![IrOp::PushI32(0)]),
|
||||
},
|
||||
],
|
||||
),
|
||||
];
|
||||
let mut map = HashMap::new();
|
||||
map.insert(WordId(1), 1u32);
|
||||
map.insert(WordId(2), 2u32);
|
||||
let result = compile_consolidated_module(&words, &map, 16);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consolidated_module_validates_loop_with_calls() {
|
||||
// DO LOOP body contains a call to a consolidated word
|
||||
let words = vec![
|
||||
(WordId(1), vec![IrOp::PushI32(1), IrOp::Add]),
|
||||
(
|
||||
WordId(2),
|
||||
vec![
|
||||
IrOp::PushI32(0),
|
||||
IrOp::PushI32(3),
|
||||
IrOp::PushI32(0),
|
||||
IrOp::DoLoop {
|
||||
body: vec![IrOp::Call(WordId(1))],
|
||||
is_plus_loop: false,
|
||||
},
|
||||
],
|
||||
),
|
||||
];
|
||||
let mut map = HashMap::new();
|
||||
map.insert(WordId(1), 1u32);
|
||||
map.insert(WordId(2), 2u32);
|
||||
let result = compile_consolidated_module(&words, &map, 16);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consolidated_module_validates_begin_until_with_calls() {
|
||||
let words = vec![
|
||||
(WordId(1), vec![IrOp::PushI32(1), IrOp::Sub]),
|
||||
(
|
||||
WordId(2),
|
||||
vec![
|
||||
IrOp::PushI32(5),
|
||||
IrOp::BeginUntil {
|
||||
body: vec![IrOp::Call(WordId(1)), IrOp::Dup, IrOp::ZeroEq],
|
||||
},
|
||||
],
|
||||
),
|
||||
];
|
||||
let mut map = HashMap::new();
|
||||
map.insert(WordId(1), 1u32);
|
||||
map.insert(WordId(2), 2u32);
|
||||
let result = compile_consolidated_module(&words, &map, 16);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consolidated_module_validates_begin_while_repeat_with_calls() {
|
||||
let words = vec![
|
||||
(WordId(1), vec![IrOp::PushI32(1), IrOp::Sub]),
|
||||
(
|
||||
WordId(2),
|
||||
vec![
|
||||
IrOp::PushI32(3),
|
||||
IrOp::BeginWhileRepeat {
|
||||
test: vec![IrOp::Dup],
|
||||
body: vec![IrOp::Call(WordId(1))],
|
||||
},
|
||||
],
|
||||
),
|
||||
];
|
||||
let mut map = HashMap::new();
|
||||
map.insert(WordId(1), 1u32);
|
||||
map.insert(WordId(2), 2u32);
|
||||
let result = compile_consolidated_module(&words, &map, 16);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -414,8 +414,8 @@ fn inline(ops: Vec<IrOp>, bodies: &HashMap<WordId, Vec<IrOp>>, max_size: usize)
|
||||
for op in ops {
|
||||
match &op {
|
||||
IrOp::Call(id) => {
|
||||
if let Some(body) = bodies.get(id) {
|
||||
if body.len() <= max_size && !contains_call_to(body, *id) {
|
||||
if let Some(body) = bodies.get(id)
|
||||
&& body.len() <= max_size && !contains_call_to(body, *id) {
|
||||
// Inline the body, converting TailCall back to Call
|
||||
// (tail position in the callee is not tail position in the caller)
|
||||
for inlined_op in body {
|
||||
@@ -426,11 +426,12 @@ fn inline(ops: Vec<IrOp>, bodies: &HashMap<WordId, Vec<IrOp>>, max_size: usize)
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
out.push(op);
|
||||
}
|
||||
_ => {
|
||||
out.push(apply_to_bodies(op, &|inner| inline(inner, bodies, max_size)));
|
||||
out.push(apply_to_bodies(op, &|inner| {
|
||||
inline(inner, bodies, max_size)
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -449,15 +450,12 @@ fn contains_call_to(ops: &[IrOp], target: WordId) -> bool {
|
||||
if contains_call_to(then_body, target) {
|
||||
return true;
|
||||
}
|
||||
if let Some(eb) = else_body {
|
||||
if contains_call_to(eb, target) {
|
||||
if let Some(eb) = else_body
|
||||
&& contains_call_to(eb, target) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
IrOp::DoLoop { body, .. }
|
||||
| IrOp::BeginUntil { body }
|
||||
| IrOp::BeginAgain { body } => {
|
||||
IrOp::DoLoop { body, .. } | IrOp::BeginUntil { body } | IrOp::BeginAgain { body } => {
|
||||
if contains_call_to(body, target) {
|
||||
return true;
|
||||
}
|
||||
@@ -481,12 +479,11 @@ fn contains_call_to(ops: &[IrOp], target: WordId) -> bool {
|
||||
{
|
||||
return true;
|
||||
}
|
||||
if let Some(eb) = else_body {
|
||||
if contains_call_to(eb, target) {
|
||||
if let Some(eb) = else_body
|
||||
&& contains_call_to(eb, target) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
@@ -567,10 +564,7 @@ mod tests {
|
||||
optimize(ops, &config, &HashMap::new())
|
||||
}
|
||||
|
||||
fn opt_with_inline(
|
||||
ops: Vec<IrOp>,
|
||||
bodies: &HashMap<WordId, Vec<IrOp>>,
|
||||
) -> Vec<IrOp> {
|
||||
fn opt_with_inline(ops: Vec<IrOp>, bodies: &HashMap<WordId, Vec<IrOp>>) -> Vec<IrOp> {
|
||||
let config = OptConfig {
|
||||
peephole: true,
|
||||
constant_fold: true,
|
||||
@@ -754,10 +748,7 @@ mod tests {
|
||||
let mut bodies = HashMap::new();
|
||||
// SQUARE = DUP *
|
||||
bodies.insert(WordId(5), vec![IrOp::Dup, IrOp::Mul]);
|
||||
let result = opt_with_inline(
|
||||
vec![IrOp::PushI32(7), IrOp::Call(WordId(5))],
|
||||
&bodies,
|
||||
);
|
||||
let result = opt_with_inline(vec![IrOp::PushI32(7), IrOp::Call(WordId(5))], &bodies);
|
||||
// After inlining: 7 DUP * (Dup isn't folded by constant folder)
|
||||
assert_eq!(result, vec![IrOp::PushI32(7), IrOp::Dup, IrOp::Mul]);
|
||||
}
|
||||
@@ -767,10 +758,7 @@ mod tests {
|
||||
let mut bodies = HashMap::new();
|
||||
// ADD3 = 3 +
|
||||
bodies.insert(WordId(5), vec![IrOp::PushI32(3), IrOp::Add]);
|
||||
let result = opt_with_inline(
|
||||
vec![IrOp::PushI32(5), IrOp::Call(WordId(5))],
|
||||
&bodies,
|
||||
);
|
||||
let result = opt_with_inline(vec![IrOp::PushI32(5), IrOp::Call(WordId(5))], &bodies);
|
||||
// After inlining: PushI32(5) PushI32(3) Add => folded to PushI32(8)
|
||||
assert_eq!(result, vec![IrOp::PushI32(8)]);
|
||||
}
|
||||
@@ -781,7 +769,10 @@ mod tests {
|
||||
bodies.insert(WordId(5), vec![IrOp::Dup, IrOp::Call(WordId(5))]);
|
||||
let result = opt_with_inline(vec![IrOp::Call(WordId(5))], &bodies);
|
||||
// Should NOT inline (recursive), but tail call detect may convert
|
||||
assert!(matches!(result.last(), Some(IrOp::Call(WordId(5))) | Some(IrOp::TailCall(WordId(5)))));
|
||||
assert!(matches!(
|
||||
result.last(),
|
||||
Some(IrOp::Call(WordId(5))) | Some(IrOp::TailCall(WordId(5)))
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
+151
-1
@@ -16,7 +16,7 @@ use wasmtime::{
|
||||
Table, Val, ValType,
|
||||
};
|
||||
|
||||
use crate::codegen::{CodegenConfig, CompiledModule, compile_word};
|
||||
use crate::codegen::{CodegenConfig, CompiledModule, compile_consolidated_module, compile_word};
|
||||
use crate::dictionary::{Dictionary, WordId};
|
||||
use crate::ir::IrOp;
|
||||
use crate::memory::{
|
||||
@@ -640,6 +640,7 @@ impl ForthVM {
|
||||
"FVARIABLE" => return self.define_fvariable(),
|
||||
"FCONSTANT" => return self.define_fconstant(),
|
||||
"FVALUE" => return self.define_fvalue(),
|
||||
"CONSOLIDATE" => return self.consolidate(),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
@@ -1488,6 +1489,70 @@ impl ForthVM {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Consolidation
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
/// Recompile all IR-based words into a single WASM module with direct calls.
|
||||
///
|
||||
/// After consolidation, `call_indirect` between IR-based words is replaced
|
||||
/// with direct `call` instructions, enabling Cranelift to optimize across
|
||||
/// word boundaries. Host functions are unaffected and still use indirect
|
||||
/// calls.
|
||||
fn consolidate(&mut self) -> anyhow::Result<()> {
|
||||
// Collect all words with IR bodies
|
||||
let mut words: Vec<(WordId, Vec<IrOp>)> = self
|
||||
.ir_bodies
|
||||
.iter()
|
||||
.map(|(&id, body)| (id, body.clone()))
|
||||
.collect();
|
||||
words.sort_by_key(|(id, _)| id.0);
|
||||
|
||||
if words.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Build local function map: WordId -> module-internal function index.
|
||||
// Imported functions: emit (idx 0). Defined functions start at idx 1.
|
||||
let mut local_fn_map = HashMap::new();
|
||||
for (i, (word_id, _)) in words.iter().enumerate() {
|
||||
local_fn_map.insert(*word_id, (i as u32) + 1);
|
||||
}
|
||||
|
||||
let table_size = self.table_size();
|
||||
|
||||
// Compile the consolidated module
|
||||
let module_bytes = compile_consolidated_module(&words, &local_fn_map, table_size)
|
||||
.map_err(|e| anyhow::anyhow!("consolidation codegen error: {e}"))?;
|
||||
|
||||
// Instantiate
|
||||
let module = Module::new(&self.engine, &module_bytes)?;
|
||||
let instance = Instance::new(
|
||||
&mut self.store,
|
||||
&module,
|
||||
&[
|
||||
self.emit_func.into(),
|
||||
self.memory.into(),
|
||||
self.dsp.into(),
|
||||
self.rsp.into(),
|
||||
self.fsp.into(),
|
||||
self.table.into(),
|
||||
],
|
||||
)?;
|
||||
|
||||
// Update function table with new exports
|
||||
for (i, (word_id, _)) in words.iter().enumerate() {
|
||||
let export_name = format!("fn_{i}");
|
||||
let func = instance
|
||||
.get_func(&mut self.store, &export_name)
|
||||
.ok_or_else(|| anyhow::anyhow!("missing export {export_name}"))?;
|
||||
self.table
|
||||
.set(&mut self.store, word_id.0 as u64, Ref::Func(Some(func)))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// WASM instantiation
|
||||
// -----------------------------------------------------------------------
|
||||
@@ -10111,4 +10176,89 @@ mod tests {
|
||||
assert_eq!(eval_stack(": T2 0 IF 42 ELSE 0 THEN ; T2"), vec![0]);
|
||||
assert_eq!(eval_stack(": SUM 0 SWAP 0 DO I + LOOP ; 10 SUM"), vec![45]);
|
||||
}
|
||||
|
||||
// -- CONSOLIDATE tests --
|
||||
|
||||
#[test]
|
||||
fn consolidate_basic() {
|
||||
assert_eq!(eval_stack(": A 1 ; : B A 2 + ; CONSOLIDATE B"), vec![3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consolidate_preserves_host_functions() {
|
||||
assert_eq!(
|
||||
eval_output(": HELLO 72 EMIT 73 EMIT ; CONSOLIDATE HELLO"),
|
||||
"HI"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consolidate_no_op_when_empty() {
|
||||
// CONSOLIDATE with no user words should not error
|
||||
let (stack, _) = eval("CONSOLIDATE 42");
|
||||
assert_eq!(stack, vec![42]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consolidate_multiple_words() {
|
||||
assert_eq!(
|
||||
eval_stack(": X 10 ; : Y 20 ; : Z X Y + ; CONSOLIDATE Z"),
|
||||
vec![30]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consolidate_with_control_flow() {
|
||||
assert_eq!(
|
||||
eval_stack(": ABS2 DUP 0< IF NEGATE THEN ; CONSOLIDATE -5 ABS2"),
|
||||
vec![5]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consolidate_with_loop() {
|
||||
assert_eq!(
|
||||
eval_stack(": SUM2 0 SWAP 0 DO I + LOOP ; CONSOLIDATE 5 SUM2"),
|
||||
vec![10]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consolidate_preserves_variables() {
|
||||
assert_eq!(
|
||||
eval_stack("VARIABLE V 42 V ! : RV V @ ; CONSOLIDATE RV"),
|
||||
vec![42]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consolidate_nested_calls() {
|
||||
// A calls B which calls C -- all should use direct calls after consolidation
|
||||
assert_eq!(
|
||||
eval_stack(": C 1 ; : B C C + ; : A B B + ; CONSOLIDATE A"),
|
||||
vec![4]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consolidate_words_still_work_individually() {
|
||||
assert_eq!(eval_stack(": P 3 ; : Q 4 ; CONSOLIDATE P Q +"), vec![7]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consolidate_with_begin_until() {
|
||||
// Countdown: start at 5, subtract 1 until 0
|
||||
assert_eq!(
|
||||
eval_stack(": CD BEGIN 1- DUP 0= UNTIL ; CONSOLIDATE 5 CD"),
|
||||
vec![0]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consolidate_with_begin_while_repeat() {
|
||||
assert_eq!(
|
||||
eval_stack(": CW BEGIN DUP WHILE 1- REPEAT ; CONSOLIDATE 3 CW"),
|
||||
vec![0]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user