Implement startup batching: 12x faster boot
Batch-compile all ~64 IR primitives into a single WASM module at startup. Replaces 64 separate Module::new + Instance::new with 1 of each. Reuses compile_consolidated_module() directly, removed compile_core_module() stub. Boot time: 7.7ms -> 0.6ms (release), test suite: 5.1s -> 1.5s (debug). 13 of 14 optimizations now implemented. 392 tests passing.
This commit is contained in:
@@ -1877,16 +1877,6 @@ pub fn compile_consolidated_module(
|
|||||||
Ok(bytes)
|
Ok(bytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Generate the core/bootstrap WASM module.
|
|
||||||
///
|
|
||||||
/// Not yet implemented -- will be built in a future step.
|
|
||||||
pub fn compile_core_module(primitives: &[(String, Vec<IrOp>)]) -> WaferResult<Vec<u8>> {
|
|
||||||
let _ = primitives;
|
|
||||||
Err(WaferError::CodegenError(
|
|
||||||
"compile_core_module not yet implemented".to_string(),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Tests
|
// Tests
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -237,6 +237,10 @@ pub struct ForthVM {
|
|||||||
config: WaferConfig,
|
config: WaferConfig,
|
||||||
/// Total WASM module bytes compiled.
|
/// Total WASM module bytes compiled.
|
||||||
total_module_bytes: u64,
|
total_module_bytes: u64,
|
||||||
|
/// When true, `register_primitive` defers WASM compilation for batch processing.
|
||||||
|
batch_mode: bool,
|
||||||
|
/// IR primitives deferred during `batch_mode` for single-module compilation.
|
||||||
|
deferred_ir: Vec<(WordId, Vec<IrOp>)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ForthVM {
|
impl ForthVM {
|
||||||
@@ -360,6 +364,8 @@ impl ForthVM {
|
|||||||
ir_bodies: HashMap::new(),
|
ir_bodies: HashMap::new(),
|
||||||
config: wafer_config,
|
config: wafer_config,
|
||||||
total_module_bytes: 0,
|
total_module_bytes: 0,
|
||||||
|
batch_mode: false,
|
||||||
|
deferred_ir: Vec::new(),
|
||||||
};
|
};
|
||||||
|
|
||||||
vm.register_primitives()?;
|
vm.register_primitives()?;
|
||||||
@@ -1563,6 +1569,50 @@ impl ForthVM {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Batch-compile all deferred IR primitives into a single WASM module.
|
||||||
|
fn batch_compile_deferred(&mut self) -> anyhow::Result<()> {
|
||||||
|
let words = std::mem::take(&mut self.deferred_ir);
|
||||||
|
if words.is_empty() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut local_fn_map = HashMap::new();
|
||||||
|
for (i, (word_id, _)) in words.iter().enumerate() {
|
||||||
|
local_fn_map.insert(*word_id, (i as u32) + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.ensure_table_size(self.next_table_index)?;
|
||||||
|
let table_size = self.table_size();
|
||||||
|
|
||||||
|
let module_bytes = compile_consolidated_module(&words, &local_fn_map, table_size)
|
||||||
|
.map_err(|e| anyhow::anyhow!("batch compile error: {e}"))?;
|
||||||
|
|
||||||
|
self.total_module_bytes += module_bytes.len() as u64;
|
||||||
|
let module = Module::new(&self.engine, &module_bytes)?;
|
||||||
|
let instance = Instance::new(
|
||||||
|
&mut self.store,
|
||||||
|
&module,
|
||||||
|
&[
|
||||||
|
self.emit_func.into(),
|
||||||
|
self.memory.into(),
|
||||||
|
self.dsp.into(),
|
||||||
|
self.rsp.into(),
|
||||||
|
self.fsp.into(),
|
||||||
|
self.table.into(),
|
||||||
|
],
|
||||||
|
)?;
|
||||||
|
|
||||||
|
for (i, (word_id, _)) in words.iter().enumerate() {
|
||||||
|
let func = instance
|
||||||
|
.get_func(&mut self.store, &format!("fn_{i}"))
|
||||||
|
.ok_or_else(|| anyhow::anyhow!("missing batch export fn_{i}"))?;
|
||||||
|
self.table
|
||||||
|
.set(&mut self.store, word_id.0 as u64, Ref::Func(Some(func)))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
// -----------------------------------------------------------------------
|
// -----------------------------------------------------------------------
|
||||||
// WASM instantiation
|
// WASM instantiation
|
||||||
// -----------------------------------------------------------------------
|
// -----------------------------------------------------------------------
|
||||||
@@ -1860,7 +1910,14 @@ impl ForthVM {
|
|||||||
.create(name, immediate)
|
.create(name, immediate)
|
||||||
.map_err(|e| anyhow::anyhow!("{e}"))?;
|
.map_err(|e| anyhow::anyhow!("{e}"))?;
|
||||||
self.ir_bodies.insert(word_id, ir_body.clone());
|
self.ir_bodies.insert(word_id, ir_body.clone());
|
||||||
|
self.dictionary.reveal();
|
||||||
|
self.sync_word_lookup(name, word_id, immediate);
|
||||||
|
self.next_table_index = self.next_table_index.max(word_id.0 + 1);
|
||||||
|
|
||||||
|
if self.batch_mode {
|
||||||
|
// Defer WASM compilation for batch processing
|
||||||
|
self.deferred_ir.push((word_id, ir_body));
|
||||||
|
} else {
|
||||||
let config = CodegenConfig {
|
let config = CodegenConfig {
|
||||||
base_fn_index: word_id.0,
|
base_fn_index: word_id.0,
|
||||||
table_size: self.table_size(),
|
table_size: self.table_size(),
|
||||||
@@ -1868,11 +1925,8 @@ impl ForthVM {
|
|||||||
};
|
};
|
||||||
let compiled = compile_word(name, &ir_body, &config)
|
let compiled = compile_word(name, &ir_body, &config)
|
||||||
.map_err(|e| anyhow::anyhow!("codegen error for {name}: {e}"))?;
|
.map_err(|e| anyhow::anyhow!("codegen error for {name}: {e}"))?;
|
||||||
|
|
||||||
self.instantiate_and_install(&compiled, word_id)?;
|
self.instantiate_and_install(&compiled, word_id)?;
|
||||||
self.dictionary.reveal();
|
}
|
||||||
self.sync_word_lookup(name, word_id, immediate);
|
|
||||||
self.next_table_index = self.next_table_index.max(word_id.0 + 1);
|
|
||||||
|
|
||||||
Ok(word_id)
|
Ok(word_id)
|
||||||
}
|
}
|
||||||
@@ -1901,6 +1955,8 @@ impl ForthVM {
|
|||||||
|
|
||||||
/// Register all built-in primitive words.
|
/// Register all built-in primitive words.
|
||||||
fn register_primitives(&mut self) -> anyhow::Result<()> {
|
fn register_primitives(&mut self) -> anyhow::Result<()> {
|
||||||
|
self.batch_mode = true;
|
||||||
|
|
||||||
// -- Stack manipulation --
|
// -- Stack manipulation --
|
||||||
self.register_primitive("DUP", false, vec![IrOp::Dup])?;
|
self.register_primitive("DUP", false, vec![IrOp::Dup])?;
|
||||||
self.register_primitive("DROP", false, vec![IrOp::Drop])?;
|
self.register_primitive("DROP", false, vec![IrOp::Drop])?;
|
||||||
@@ -2187,6 +2243,10 @@ impl ForthVM {
|
|||||||
// -- Floating-Point word set --
|
// -- Floating-Point word set --
|
||||||
self.register_float_words()?;
|
self.register_float_words()?;
|
||||||
|
|
||||||
|
// Batch-compile all deferred IR primitives into a single WASM module
|
||||||
|
self.batch_mode = false;
|
||||||
|
self.batch_compile_deferred()?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ This document describes every optimization that makes sense for WAFER, why it ma
|
|||||||
| 10 | Codegen Improvements | Codegen | Done | Medium |
|
| 10 | Codegen Improvements | Codegen | Done | Medium |
|
||||||
| 11 | wasmtime Configuration | Runtime | Done | Low |
|
| 11 | wasmtime Configuration | Runtime | Done | Low |
|
||||||
| 12 | Dictionary Hash Index | Runtime | Done | Low |
|
| 12 | Dictionary Hash Index | Runtime | Done | Low |
|
||||||
| 13 | Startup Batching | Architecture | Not started | Low |
|
| 13 | Startup Batching | Architecture | Done | Low |
|
||||||
| 14 | Float / Double-Cell | Codegen | Not started | Future |
|
| 14 | Float / Double-Cell | Codegen | Not started | Future |
|
||||||
|
|
||||||
## 1. Stack-to-Local Promotion
|
## 1. Stack-to-Local Promotion
|
||||||
@@ -422,7 +422,7 @@ This affects **compile time** (word lookup during parsing), not runtime (compile
|
|||||||
|
|
||||||
## 13. Startup Batching
|
## 13. Startup Batching
|
||||||
|
|
||||||
**Status: Not started.** `compile_core_module()` stub exists in `codegen.rs`.
|
**Status: Done.** All IR primitives batch-compiled into a single WASM module at boot via `compile_consolidated_module()`. Reduces boot from ~7.7ms to ~0.6ms (12x faster). The `compile_core_module()` stub has been removed.
|
||||||
|
|
||||||
Currently, each of the 80+ primitives registered at boot creates a separate WASM module: `wasm-encoder` builds it, `wasmparser` validates it, Cranelift compiles it, and wasmtime instantiates it. This happens 80+ times sequentially.
|
Currently, each of the 80+ primitives registered at boot creates a separate WASM module: `wasm-encoder` builds it, `wasmparser` validates it, Cranelift compiles it, and wasmtime instantiates it. This happens 80+ times sequentially.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user