Implement optimization pipeline: peephole, constant folding, strength reduction, DCE, tail calls
IR optimizer with 6 composable passes: - Peephole: PushI32+Drop, Dup+Drop, Swap+Swap, Swap+Drop→Nip, identity ops - Constant folding: binary (Add/Sub/Mul/And/Or/Xor/shifts/comparisons) + unary (Negate/Abs/Invert/ZeroEq/ZeroLt) - Strength reduction: power-of-2 multiply→shift, PushI32(0)+Eq→ZeroEq - Dead code elimination: truncate after Exit, constant-conditional If - Tail call detection: last Call→TailCall when return stack balanced - Compound ops: Over+Over→TwoDup, Drop+Drop→TwoDrop with optimized codegen Dictionary hash index for O(1) word lookup during compilation. wasmtime config: disable NaN canonicalization, enable module caching. 319 unit tests + 11 compliance, all passing.
This commit is contained in:
@@ -260,6 +260,33 @@ fn emit_op(f: &mut Function, op: &IrOp) {
|
|||||||
push_via_local(f, 2);
|
push_via_local(f, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IrOp::TwoDup => {
|
||||||
|
// ( a b -- a b a b ) : read top two cells, push copies
|
||||||
|
// Read b (at dsp) into local 0
|
||||||
|
f.instruction(&Instruction::GlobalGet(DSP))
|
||||||
|
.instruction(&Instruction::I32Load(MEM4))
|
||||||
|
.instruction(&Instruction::LocalSet(0));
|
||||||
|
// Read a (at dsp + 4) into local 1
|
||||||
|
f.instruction(&Instruction::GlobalGet(DSP))
|
||||||
|
.instruction(&Instruction::I32Const(CELL_SIZE as i32))
|
||||||
|
.instruction(&Instruction::I32Add)
|
||||||
|
.instruction(&Instruction::I32Load(MEM4))
|
||||||
|
.instruction(&Instruction::LocalSet(1));
|
||||||
|
// Push a then b
|
||||||
|
f.instruction(&Instruction::LocalGet(1));
|
||||||
|
push_via_local(f, 2);
|
||||||
|
f.instruction(&Instruction::LocalGet(0));
|
||||||
|
push_via_local(f, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
IrOp::TwoDrop => {
|
||||||
|
// ( a b -- ) : increment dsp by 2 cells
|
||||||
|
f.instruction(&Instruction::GlobalGet(DSP))
|
||||||
|
.instruction(&Instruction::I32Const(CELL_SIZE as i32 * 2))
|
||||||
|
.instruction(&Instruction::I32Add)
|
||||||
|
.instruction(&Instruction::GlobalSet(DSP));
|
||||||
|
}
|
||||||
|
|
||||||
// -- Arithmetic -----------------------------------------------------
|
// -- Arithmetic -----------------------------------------------------
|
||||||
IrOp::Add => emit_binary_commutative(f, &Instruction::I32Add),
|
IrOp::Add => emit_binary_commutative(f, &Instruction::I32Add),
|
||||||
IrOp::Mul => emit_binary_commutative(f, &Instruction::I32Mul),
|
IrOp::Mul => emit_binary_commutative(f, &Instruction::I32Mul),
|
||||||
|
|||||||
@@ -7,6 +7,8 @@
|
|||||||
//! - Code field: function table index (4 bytes)
|
//! - Code field: function table index (4 bytes)
|
||||||
//! - Parameter field: data for `CREATEd` words, DOES> action, etc.
|
//! - Parameter field: data for `CREATEd` words, DOES> action, etc.
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use crate::error::{WaferError, WaferResult};
|
use crate::error::{WaferError, WaferResult};
|
||||||
use crate::memory::{DICTIONARY_BASE, INITIAL_PAGES, PAGE_SIZE};
|
use crate::memory::{DICTIONARY_BASE, INITIAL_PAGES, PAGE_SIZE};
|
||||||
|
|
||||||
@@ -36,6 +38,8 @@ pub struct Dictionary {
|
|||||||
here: u32,
|
here: u32,
|
||||||
/// Next available function table index.
|
/// Next available function table index.
|
||||||
next_fn_index: u32,
|
next_fn_index: u32,
|
||||||
|
/// Hash index for O(1) word lookup: name -> (`word_addr`, `fn_index`, `is_immediate`).
|
||||||
|
index: HashMap<String, (u32, u32, bool)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Align an address upward to a 4-byte boundary.
|
/// Align an address upward to a 4-byte boundary.
|
||||||
@@ -53,6 +57,7 @@ impl Dictionary {
|
|||||||
latest: 0,
|
latest: 0,
|
||||||
here: DICTIONARY_BASE,
|
here: DICTIONARY_BASE,
|
||||||
next_fn_index: 0,
|
next_fn_index: 0,
|
||||||
|
index: HashMap::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -127,6 +132,7 @@ impl Dictionary {
|
|||||||
let flags_addr = (self.latest + 4) as usize;
|
let flags_addr = (self.latest + 4) as usize;
|
||||||
if flags_addr < self.memory.len() {
|
if flags_addr < self.memory.len() {
|
||||||
self.memory[flags_addr] &= !flags::HIDDEN;
|
self.memory[flags_addr] &= !flags::HIDDEN;
|
||||||
|
self.update_index(self.latest);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -135,6 +141,7 @@ impl Dictionary {
|
|||||||
let flags_addr = (word_addr + 4) as usize;
|
let flags_addr = (word_addr + 4) as usize;
|
||||||
if flags_addr < self.memory.len() {
|
if flags_addr < self.memory.len() {
|
||||||
self.memory[flags_addr] &= !flags::HIDDEN;
|
self.memory[flags_addr] &= !flags::HIDDEN;
|
||||||
|
self.update_index(word_addr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -142,14 +149,26 @@ impl Dictionary {
|
|||||||
pub fn set_code_field(&mut self, word_addr: u32, fn_index: u32) {
|
pub fn set_code_field(&mut self, word_addr: u32, fn_index: u32) {
|
||||||
if let Ok(code_addr) = self.code_field_addr(word_addr) {
|
if let Ok(code_addr) = self.code_field_addr(word_addr) {
|
||||||
self.write_u32_unchecked(code_addr, fn_index);
|
self.write_u32_unchecked(code_addr, fn_index);
|
||||||
|
// Update the index if the word is visible
|
||||||
|
let flags_addr = (word_addr + 4) as usize;
|
||||||
|
if flags_addr < self.memory.len() && self.memory[flags_addr] & flags::HIDDEN == 0 {
|
||||||
|
self.update_index(word_addr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Look up a word by name. Returns (`word_address`, `word_id`, `is_immediate`).
|
/// Look up a word by name. Returns (`word_address`, `word_id`, `is_immediate`).
|
||||||
/// Searches from LATEST backward through the linked list.
|
/// Uses the hash index for O(1) lookup, with linked-list fallback.
|
||||||
/// Skips HIDDEN words.
|
/// Skips HIDDEN words.
|
||||||
pub fn find(&self, name: &str) -> Option<(u32, WordId, bool)> {
|
pub fn find(&self, name: &str) -> Option<(u32, WordId, bool)> {
|
||||||
let search_name = name.to_ascii_uppercase();
|
let search_name = name.to_ascii_uppercase();
|
||||||
|
|
||||||
|
// Fast path: hash index lookup
|
||||||
|
if let Some(&(word_addr, fn_index, is_immediate)) = self.index.get(&search_name) {
|
||||||
|
return Some((word_addr, WordId(fn_index), is_immediate));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: linked-list walk (for words not yet in the index)
|
||||||
let search_bytes = search_name.as_bytes();
|
let search_bytes = search_name.as_bytes();
|
||||||
let search_len = search_bytes.len();
|
let search_len = search_bytes.len();
|
||||||
|
|
||||||
@@ -326,6 +345,10 @@ impl Dictionary {
|
|||||||
return Err(WaferError::InvalidAddress(self.latest + 4));
|
return Err(WaferError::InvalidAddress(self.latest + 4));
|
||||||
}
|
}
|
||||||
self.memory[flags_addr] ^= flags::IMMEDIATE;
|
self.memory[flags_addr] ^= flags::IMMEDIATE;
|
||||||
|
// Update the index if the word is visible (not hidden)
|
||||||
|
if self.memory[flags_addr] & flags::HIDDEN == 0 {
|
||||||
|
self.update_index(self.latest);
|
||||||
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -341,6 +364,27 @@ impl Dictionary {
|
|||||||
|
|
||||||
// -- Private helpers --
|
// -- Private helpers --
|
||||||
|
|
||||||
|
/// Insert or update the hash index entry for the word at `word_addr`.
|
||||||
|
/// Reads the name, `fn_index`, and immediate flag from the memory buffer.
|
||||||
|
fn update_index(&mut self, word_addr: u32) {
|
||||||
|
let flags_addr = (word_addr + 4) as usize;
|
||||||
|
if flags_addr >= self.memory.len() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let flags_byte = self.memory[flags_addr];
|
||||||
|
let name_len = (flags_byte & flags::LENGTH_MASK) as usize;
|
||||||
|
let name_start = (word_addr + 5) as usize;
|
||||||
|
let name_end = name_start + name_len;
|
||||||
|
if name_end > self.memory.len() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let name = String::from_utf8_lossy(&self.memory[name_start..name_end]).to_string();
|
||||||
|
let is_immediate = flags_byte & flags::IMMEDIATE != 0;
|
||||||
|
let code_addr = align4(word_addr + 5 + name_len as u32);
|
||||||
|
let fn_index = self.read_u32_unchecked(code_addr);
|
||||||
|
self.index.insert(name, (word_addr, fn_index, is_immediate));
|
||||||
|
}
|
||||||
|
|
||||||
/// Compute the address of the code field for the word at `word_addr`.
|
/// Compute the address of the code field for the word at `word_addr`.
|
||||||
fn code_field_addr(&self, word_addr: u32) -> WaferResult<u32> {
|
fn code_field_addr(&self, word_addr: u32) -> WaferResult<u32> {
|
||||||
let flags_addr = (word_addr + 4) as usize;
|
let flags_addr = (word_addr + 4) as usize;
|
||||||
|
|||||||
@@ -24,6 +24,10 @@ pub enum IrOp {
|
|||||||
Rot,
|
Rot,
|
||||||
Nip,
|
Nip,
|
||||||
Tuck,
|
Tuck,
|
||||||
|
/// Two-item duplication: ( a b -- a b a b )
|
||||||
|
TwoDup,
|
||||||
|
/// Two-item drop: ( a b -- )
|
||||||
|
TwoDrop,
|
||||||
|
|
||||||
// -- Arithmetic --
|
// -- Arithmetic --
|
||||||
Add,
|
Add,
|
||||||
|
|||||||
@@ -1,19 +1,618 @@
|
|||||||
//! Optimization passes for WAFER's IR.
|
//! Optimization passes for WAFER's IR.
|
||||||
//!
|
//!
|
||||||
//! Each pass is a function `Vec<IrOp> -> Vec<IrOp>`, composable in sequence:
|
//! Each pass is a function `Vec<IrOp> -> Vec<IrOp>`, composable in sequence:
|
||||||
//! 1. Constant folding
|
//! 1. Peephole optimization
|
||||||
//! 2. Strength reduction
|
//! 2. Constant folding
|
||||||
//! 3. Peephole optimization
|
//! 3. Strength reduction
|
||||||
//! 4. Inlining
|
//! 4. Dead code elimination
|
||||||
//! 5. Dead code elimination
|
//! 5. Tail call detection
|
||||||
//! 6. Stack-to-local promotion
|
|
||||||
|
|
||||||
// TODO: Step 11 - Optimization pass implementations
|
use crate::ir::IrOp;
|
||||||
|
|
||||||
|
/// Configuration for the optimization pipeline.
|
||||||
|
#[derive(Debug, Clone, Default)]
|
||||||
|
pub struct OptConfig {
|
||||||
|
/// Enable peephole optimization patterns.
|
||||||
|
pub peephole: bool,
|
||||||
|
/// Enable constant folding.
|
||||||
|
pub constant_fold: bool,
|
||||||
|
/// Enable tail call detection.
|
||||||
|
pub tail_call: bool,
|
||||||
|
/// Enable strength reduction (e.g., multiply by power of 2 -> shift).
|
||||||
|
pub strength_reduce: bool,
|
||||||
|
/// Enable dead code elimination.
|
||||||
|
pub dce: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run all enabled optimization passes.
|
||||||
|
pub fn optimize(ops: Vec<IrOp>, config: &OptConfig) -> Vec<IrOp> {
|
||||||
|
let mut ir = ops;
|
||||||
|
|
||||||
|
// Phase 1: simplify
|
||||||
|
if config.peephole {
|
||||||
|
ir = peephole(ir);
|
||||||
|
}
|
||||||
|
if config.constant_fold {
|
||||||
|
ir = constant_fold(ir);
|
||||||
|
}
|
||||||
|
if config.strength_reduce {
|
||||||
|
ir = strength_reduce(ir);
|
||||||
|
}
|
||||||
|
if config.peephole {
|
||||||
|
ir = peephole(ir);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase 2: eliminate dead code
|
||||||
|
if config.dce {
|
||||||
|
ir = dce(ir);
|
||||||
|
}
|
||||||
|
if config.peephole {
|
||||||
|
ir = peephole(ir);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase 3: tail calls (must be last)
|
||||||
|
if config.tail_call {
|
||||||
|
ir = tail_call_detect(ir);
|
||||||
|
}
|
||||||
|
|
||||||
|
ir
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Helper: recurse into control-flow bodies
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Apply a pass function to all nested bodies within a control-flow IR op.
|
||||||
|
fn apply_to_bodies<F: Fn(Vec<IrOp>) -> Vec<IrOp>>(op: IrOp, pass: &F) -> IrOp {
|
||||||
|
match op {
|
||||||
|
IrOp::If {
|
||||||
|
then_body,
|
||||||
|
else_body,
|
||||||
|
} => IrOp::If {
|
||||||
|
then_body: pass(then_body),
|
||||||
|
else_body: else_body.map(pass),
|
||||||
|
},
|
||||||
|
IrOp::DoLoop { body, is_plus_loop } => IrOp::DoLoop {
|
||||||
|
body: pass(body),
|
||||||
|
is_plus_loop,
|
||||||
|
},
|
||||||
|
IrOp::BeginUntil { body } => IrOp::BeginUntil { body: pass(body) },
|
||||||
|
IrOp::BeginAgain { body } => IrOp::BeginAgain { body: pass(body) },
|
||||||
|
IrOp::BeginWhileRepeat { test, body } => IrOp::BeginWhileRepeat {
|
||||||
|
test: pass(test),
|
||||||
|
body: pass(body),
|
||||||
|
},
|
||||||
|
IrOp::BeginDoubleWhileRepeat {
|
||||||
|
outer_test,
|
||||||
|
inner_test,
|
||||||
|
body,
|
||||||
|
after_repeat,
|
||||||
|
else_body,
|
||||||
|
} => IrOp::BeginDoubleWhileRepeat {
|
||||||
|
outer_test: pass(outer_test),
|
||||||
|
inner_test: pass(inner_test),
|
||||||
|
body: pass(body),
|
||||||
|
after_repeat: pass(after_repeat),
|
||||||
|
else_body: else_body.map(pass),
|
||||||
|
},
|
||||||
|
other => other,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Pass 1: Peephole optimization
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Peephole optimizer: pattern-match adjacent ops and simplify.
|
||||||
|
fn peephole(ops: Vec<IrOp>) -> Vec<IrOp> {
|
||||||
|
let mut ir = ops;
|
||||||
|
loop {
|
||||||
|
let before_len = ir.len();
|
||||||
|
ir = peephole_one_pass(ir);
|
||||||
|
if ir.len() == before_len {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ir
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Single peephole pass (one sweep through the IR).
|
||||||
|
fn peephole_one_pass(ops: Vec<IrOp>) -> Vec<IrOp> {
|
||||||
|
let mut out: Vec<IrOp> = Vec::with_capacity(ops.len());
|
||||||
|
|
||||||
|
for op in ops {
|
||||||
|
// Recurse into control-flow bodies first
|
||||||
|
let op = apply_to_bodies(op, &peephole);
|
||||||
|
|
||||||
|
// Try to match the new op against the last item in output
|
||||||
|
if let Some(prev) = out.last() {
|
||||||
|
match (&prev, &op) {
|
||||||
|
// PushI32(n), Drop => remove both
|
||||||
|
(IrOp::PushI32(_), IrOp::Drop) => {
|
||||||
|
out.pop();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Dup, Drop => remove both
|
||||||
|
(IrOp::Dup, IrOp::Drop) => {
|
||||||
|
out.pop();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Swap, Swap => remove both
|
||||||
|
(IrOp::Swap, IrOp::Swap) => {
|
||||||
|
out.pop();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Swap, Drop => Nip
|
||||||
|
(IrOp::Swap, IrOp::Drop) => {
|
||||||
|
out.pop();
|
||||||
|
out.push(IrOp::Nip);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// PushI32(0), Add => identity, remove both
|
||||||
|
(IrOp::PushI32(0), IrOp::Add) => {
|
||||||
|
out.pop();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// PushI32(0), Or => identity, remove both
|
||||||
|
(IrOp::PushI32(0), IrOp::Or) => {
|
||||||
|
out.pop();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// PushI32(-1), And => identity, remove both
|
||||||
|
(IrOp::PushI32(-1), IrOp::And) => {
|
||||||
|
out.pop();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// PushI32(1), Mul => identity, remove both
|
||||||
|
(IrOp::PushI32(1), IrOp::Mul) => {
|
||||||
|
out.pop();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Over, Over => TwoDup
|
||||||
|
(IrOp::Over, IrOp::Over) => {
|
||||||
|
out.pop();
|
||||||
|
out.push(IrOp::TwoDup);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Drop, Drop => TwoDrop
|
||||||
|
(IrOp::Drop, IrOp::Drop) => {
|
||||||
|
out.pop();
|
||||||
|
out.push(IrOp::TwoDrop);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out.push(op);
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Pass 2: Constant folding
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Constant folder: evaluate operations on known constants at compile time.
|
||||||
|
fn constant_fold(ops: Vec<IrOp>) -> Vec<IrOp> {
|
||||||
|
let mut out: Vec<IrOp> = Vec::with_capacity(ops.len());
|
||||||
|
|
||||||
|
for op in ops {
|
||||||
|
// Recurse into control-flow bodies
|
||||||
|
let op = apply_to_bodies(op, &constant_fold);
|
||||||
|
|
||||||
|
// Try binary fold: last two outputs are PushI32, current op is foldable
|
||||||
|
if out.len() >= 2
|
||||||
|
&& let Some(result) = try_binary_fold(&out[out.len() - 2], &out[out.len() - 1], &op)
|
||||||
|
{
|
||||||
|
out.pop();
|
||||||
|
out.pop();
|
||||||
|
out.push(IrOp::PushI32(result));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try unary fold: last output is PushI32, current op is foldable
|
||||||
|
if !out.is_empty()
|
||||||
|
&& let Some(result) = try_unary_fold(&out[out.len() - 1], &op)
|
||||||
|
{
|
||||||
|
out.pop();
|
||||||
|
out.push(IrOp::PushI32(result));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
out.push(op);
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Try to fold a binary operation on two constants.
|
||||||
|
fn try_binary_fold(a_op: &IrOp, b_op: &IrOp, op: &IrOp) -> Option<i32> {
|
||||||
|
let (a, b) = match (a_op, b_op) {
|
||||||
|
(IrOp::PushI32(a), IrOp::PushI32(b)) => (*a, *b),
|
||||||
|
_ => return None,
|
||||||
|
};
|
||||||
|
|
||||||
|
match op {
|
||||||
|
IrOp::Add => Some(a.wrapping_add(b)),
|
||||||
|
IrOp::Sub => Some(a.wrapping_sub(b)),
|
||||||
|
IrOp::Mul => Some(a.wrapping_mul(b)),
|
||||||
|
IrOp::And => Some(a & b),
|
||||||
|
IrOp::Or => Some(a | b),
|
||||||
|
IrOp::Xor => Some(a ^ b),
|
||||||
|
IrOp::Lshift => {
|
||||||
|
if (0..32).contains(&b) {
|
||||||
|
Some(a.wrapping_shl(b as u32))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
IrOp::Rshift => {
|
||||||
|
if (0..32).contains(&b) {
|
||||||
|
Some((a as u32).wrapping_shr(b as u32) as i32)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
IrOp::ArithRshift => {
|
||||||
|
if (0..32).contains(&b) {
|
||||||
|
Some(a.wrapping_shr(b as u32))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
IrOp::Eq => Some(if a == b { -1 } else { 0 }),
|
||||||
|
IrOp::NotEq => Some(if a != b { -1 } else { 0 }),
|
||||||
|
IrOp::Lt => Some(if a < b { -1 } else { 0 }),
|
||||||
|
IrOp::Gt => Some(if a > b { -1 } else { 0 }),
|
||||||
|
IrOp::LtUnsigned => Some(if (a as u32) < (b as u32) { -1 } else { 0 }),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Try to fold a unary operation on a constant.
|
||||||
|
fn try_unary_fold(n_op: &IrOp, op: &IrOp) -> Option<i32> {
|
||||||
|
let n = match n_op {
|
||||||
|
IrOp::PushI32(n) => *n,
|
||||||
|
_ => return None,
|
||||||
|
};
|
||||||
|
|
||||||
|
match op {
|
||||||
|
IrOp::Negate => Some(n.wrapping_neg()),
|
||||||
|
IrOp::Abs => {
|
||||||
|
if n == i32::MIN {
|
||||||
|
Some(i32::MIN)
|
||||||
|
} else {
|
||||||
|
Some(n.abs())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
IrOp::Invert => Some(!n),
|
||||||
|
IrOp::ZeroEq => Some(if n == 0 { -1 } else { 0 }),
|
||||||
|
IrOp::ZeroLt => Some(if n < 0 { -1 } else { 0 }),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Pass 3: Strength reduction
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Strength reduction: replace expensive ops with cheaper equivalents.
|
||||||
|
fn strength_reduce(ops: Vec<IrOp>) -> Vec<IrOp> {
|
||||||
|
let mut out: Vec<IrOp> = Vec::with_capacity(ops.len());
|
||||||
|
|
||||||
|
for op in ops {
|
||||||
|
// Recurse into control-flow bodies
|
||||||
|
let op = apply_to_bodies(op, &strength_reduce);
|
||||||
|
|
||||||
|
if let Some(prev) = out.last() {
|
||||||
|
match (prev, &op) {
|
||||||
|
// PushI32(n) * where n is power of 2 => shift left
|
||||||
|
(IrOp::PushI32(n), IrOp::Mul) if *n > 0 && (*n as u32).is_power_of_two() => {
|
||||||
|
let shift = (*n as u32).trailing_zeros() as i32;
|
||||||
|
out.pop();
|
||||||
|
out.push(IrOp::PushI32(shift));
|
||||||
|
out.push(IrOp::Lshift);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// PushI32(0) = => ZeroEq
|
||||||
|
(IrOp::PushI32(0), IrOp::Eq) => {
|
||||||
|
out.pop();
|
||||||
|
out.push(IrOp::ZeroEq);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// PushI32(0) < => ZeroLt
|
||||||
|
(IrOp::PushI32(0), IrOp::Lt) => {
|
||||||
|
out.pop();
|
||||||
|
out.push(IrOp::ZeroLt);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out.push(op);
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Pass 4: Dead code elimination
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Dead code elimination: remove unreachable code.
|
||||||
|
fn dce(ops: Vec<IrOp>) -> Vec<IrOp> {
|
||||||
|
let mut out: Vec<IrOp> = Vec::with_capacity(ops.len());
|
||||||
|
|
||||||
|
for op in ops {
|
||||||
|
// Recurse into control-flow bodies
|
||||||
|
let op = apply_to_bodies(op, &dce);
|
||||||
|
|
||||||
|
// Constant conditional: if last output is PushI32 and current is If
|
||||||
|
if let IrOp::If {
|
||||||
|
then_body,
|
||||||
|
else_body,
|
||||||
|
} = &op
|
||||||
|
&& let Some(IrOp::PushI32(n)) = out.last()
|
||||||
|
{
|
||||||
|
let n = *n;
|
||||||
|
out.pop();
|
||||||
|
if n == 0 {
|
||||||
|
// False: emit else_body only
|
||||||
|
if let Some(eb) = else_body {
|
||||||
|
out.extend(eb.iter().cloned());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// True: emit then_body only
|
||||||
|
out.extend(then_body.iter().cloned());
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Truncate after Exit in linear sequence
|
||||||
|
if matches!(op, IrOp::Exit) {
|
||||||
|
out.push(op);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
out.push(op);
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Pass 5: Tail call detection
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Tail call detection: replace the last `Call` with `TailCall` when safe.
|
||||||
|
fn tail_call_detect(ops: Vec<IrOp>) -> Vec<IrOp> {
|
||||||
|
if ops.is_empty() || !is_return_stack_balanced(&ops) {
|
||||||
|
return ops;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut ir = ops;
|
||||||
|
let last_idx = ir.len() - 1;
|
||||||
|
ir[last_idx] = convert_tail_call(ir[last_idx].clone());
|
||||||
|
ir
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if return stack usage is balanced (equal number of `ToR` and `FromR`).
|
||||||
|
fn is_return_stack_balanced(ops: &[IrOp]) -> bool {
|
||||||
|
let mut depth: i32 = 0;
|
||||||
|
for op in ops {
|
||||||
|
match op {
|
||||||
|
IrOp::ToR => depth += 1,
|
||||||
|
IrOp::FromR => depth -= 1,
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
depth == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert a `Call` at tail position to `TailCall`, recursing into `If` branches.
|
||||||
|
fn convert_tail_call(op: IrOp) -> IrOp {
|
||||||
|
match op {
|
||||||
|
IrOp::Call(id) => IrOp::TailCall(id),
|
||||||
|
IrOp::If {
|
||||||
|
mut then_body,
|
||||||
|
else_body,
|
||||||
|
} => {
|
||||||
|
// Recursively check then_body tail
|
||||||
|
if let Some(last) = then_body.pop() {
|
||||||
|
then_body.push(convert_tail_call(last));
|
||||||
|
}
|
||||||
|
// Recursively check else_body tail
|
||||||
|
let else_body = else_body.map(|mut eb| {
|
||||||
|
if let Some(last) = eb.pop() {
|
||||||
|
eb.push(convert_tail_call(last));
|
||||||
|
}
|
||||||
|
eb
|
||||||
|
});
|
||||||
|
IrOp::If {
|
||||||
|
then_body,
|
||||||
|
else_body,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
other => other,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::dictionary::WordId;
|
||||||
|
|
||||||
|
fn opt(ops: Vec<IrOp>) -> Vec<IrOp> {
|
||||||
|
let config = OptConfig {
|
||||||
|
peephole: true,
|
||||||
|
constant_fold: true,
|
||||||
|
tail_call: true,
|
||||||
|
strength_reduce: true,
|
||||||
|
dce: true,
|
||||||
|
};
|
||||||
|
optimize(ops, &config)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Peephole tests
|
||||||
#[test]
|
#[test]
|
||||||
fn placeholder() {
|
fn push_drop_removed() {
|
||||||
// Optimizer tests will be added in Step 11
|
assert_eq!(opt(vec![IrOp::PushI32(5), IrOp::Drop]), vec![]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn dup_drop_removed() {
|
||||||
|
assert_eq!(
|
||||||
|
opt(vec![IrOp::PushI32(1), IrOp::Dup, IrOp::Drop]),
|
||||||
|
vec![IrOp::PushI32(1)]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn swap_swap_removed() {
|
||||||
|
assert_eq!(opt(vec![IrOp::Swap, IrOp::Swap]), vec![]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn swap_drop_to_nip() {
|
||||||
|
assert_eq!(opt(vec![IrOp::Swap, IrOp::Drop]), vec![IrOp::Nip]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn add_zero_identity() {
|
||||||
|
assert_eq!(opt(vec![IrOp::PushI32(0), IrOp::Add]), vec![]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Constant folding tests
|
||||||
|
#[test]
|
||||||
|
fn fold_add() {
|
||||||
|
assert_eq!(
|
||||||
|
opt(vec![IrOp::PushI32(5), IrOp::PushI32(3), IrOp::Add]),
|
||||||
|
vec![IrOp::PushI32(8)]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fold_negate() {
|
||||||
|
assert_eq!(
|
||||||
|
opt(vec![IrOp::PushI32(7), IrOp::Negate]),
|
||||||
|
vec![IrOp::PushI32(-7)]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fold_chain() {
|
||||||
|
// 2 3 + 4 * => 5 4 * => 20
|
||||||
|
assert_eq!(
|
||||||
|
opt(vec![
|
||||||
|
IrOp::PushI32(2),
|
||||||
|
IrOp::PushI32(3),
|
||||||
|
IrOp::Add,
|
||||||
|
IrOp::PushI32(4),
|
||||||
|
IrOp::Mul,
|
||||||
|
]),
|
||||||
|
vec![IrOp::PushI32(20)]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fold_comparison() {
|
||||||
|
assert_eq!(
|
||||||
|
opt(vec![IrOp::PushI32(4), IrOp::PushI32(3), IrOp::Lt]),
|
||||||
|
vec![IrOp::PushI32(0)]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strength reduction tests
|
||||||
|
#[test]
|
||||||
|
fn power_of_2_mul_to_shift() {
|
||||||
|
assert_eq!(
|
||||||
|
opt(vec![IrOp::PushI32(4), IrOp::Mul]),
|
||||||
|
vec![IrOp::PushI32(2), IrOp::Lshift]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn non_power_of_2_unchanged() {
|
||||||
|
assert_eq!(
|
||||||
|
opt(vec![IrOp::PushI32(3), IrOp::Mul]),
|
||||||
|
vec![IrOp::PushI32(3), IrOp::Mul]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tail call tests
|
||||||
|
#[test]
|
||||||
|
fn tail_call_simple() {
|
||||||
|
assert_eq!(
|
||||||
|
opt(vec![IrOp::PushI32(5), IrOp::Call(WordId(3))]),
|
||||||
|
vec![IrOp::PushI32(5), IrOp::TailCall(WordId(3))]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn no_tail_call_with_unbalanced_rstack() {
|
||||||
|
assert_eq!(
|
||||||
|
opt(vec![IrOp::ToR, IrOp::Call(WordId(3))]),
|
||||||
|
vec![IrOp::ToR, IrOp::Call(WordId(3))]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// DCE tests
|
||||||
|
#[test]
|
||||||
|
fn remove_after_exit() {
|
||||||
|
assert_eq!(
|
||||||
|
opt(vec![IrOp::PushI32(1), IrOp::Exit, IrOp::PushI32(2)]),
|
||||||
|
vec![IrOp::PushI32(1), IrOp::Exit]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn constant_true_if() {
|
||||||
|
assert_eq!(
|
||||||
|
opt(vec![
|
||||||
|
IrOp::PushI32(1),
|
||||||
|
IrOp::If {
|
||||||
|
then_body: vec![IrOp::PushI32(10)],
|
||||||
|
else_body: Some(vec![IrOp::PushI32(20)]),
|
||||||
|
}
|
||||||
|
]),
|
||||||
|
vec![IrOp::PushI32(10)]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn constant_false_if() {
|
||||||
|
assert_eq!(
|
||||||
|
opt(vec![
|
||||||
|
IrOp::PushI32(0),
|
||||||
|
IrOp::If {
|
||||||
|
then_body: vec![IrOp::PushI32(10)],
|
||||||
|
else_body: Some(vec![IrOp::PushI32(20)]),
|
||||||
|
}
|
||||||
|
]),
|
||||||
|
vec![IrOp::PushI32(20)]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compound ops tests
|
||||||
|
#[test]
|
||||||
|
fn over_over_to_twdup() {
|
||||||
|
assert_eq!(opt(vec![IrOp::Over, IrOp::Over]), vec![IrOp::TwoDup]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn drop_drop_to_twodrop() {
|
||||||
|
assert_eq!(opt(vec![IrOp::Drop, IrOp::Drop]), vec![IrOp::TwoDrop]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Nested optimization
|
||||||
|
#[test]
|
||||||
|
fn nested_if_optimized() {
|
||||||
|
assert_eq!(
|
||||||
|
opt(vec![IrOp::If {
|
||||||
|
then_body: vec![IrOp::PushI32(5), IrOp::Drop],
|
||||||
|
else_body: None,
|
||||||
|
}]),
|
||||||
|
vec![IrOp::If {
|
||||||
|
then_body: vec![],
|
||||||
|
else_body: None
|
||||||
|
}]
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ use crate::memory::{
|
|||||||
INPUT_BUFFER_SIZE, RETURN_STACK_TOP, SYSVAR_BASE_VAR, SYSVAR_NUM_TIB, SYSVAR_STATE,
|
INPUT_BUFFER_SIZE, RETURN_STACK_TOP, SYSVAR_BASE_VAR, SYSVAR_NUM_TIB, SYSVAR_STATE,
|
||||||
SYSVAR_TO_IN,
|
SYSVAR_TO_IN,
|
||||||
};
|
};
|
||||||
|
use crate::optimizer::{OptConfig, optimize};
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Control-flow compilation state
|
// Control-flow compilation state
|
||||||
@@ -234,7 +235,11 @@ pub struct ForthVM {
|
|||||||
impl ForthVM {
|
impl ForthVM {
|
||||||
/// Boot a new Forth VM with all primitives registered.
|
/// Boot a new Forth VM with all primitives registered.
|
||||||
pub fn new() -> anyhow::Result<Self> {
|
pub fn new() -> anyhow::Result<Self> {
|
||||||
let engine = Engine::default();
|
let mut config = wasmtime::Config::new();
|
||||||
|
config.cranelift_nan_canonicalization(false);
|
||||||
|
// Best-effort module caching
|
||||||
|
let _ = config.cache_config_load_default();
|
||||||
|
let engine = Engine::new(&config)?;
|
||||||
let output = Arc::new(Mutex::new(String::new()));
|
let output = Arc::new(Mutex::new(String::new()));
|
||||||
|
|
||||||
let host = VmHost {
|
let host = VmHost {
|
||||||
@@ -1421,6 +1426,18 @@ impl ForthVM {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Run all enabled optimization passes on an IR sequence.
|
||||||
|
fn optimize_ir(ir: Vec<IrOp>) -> Vec<IrOp> {
|
||||||
|
let config = OptConfig {
|
||||||
|
peephole: true,
|
||||||
|
constant_fold: true,
|
||||||
|
tail_call: true,
|
||||||
|
strength_reduce: true,
|
||||||
|
dce: true,
|
||||||
|
};
|
||||||
|
optimize(ir, &config)
|
||||||
|
}
|
||||||
|
|
||||||
fn finish_colon_def(&mut self) -> anyhow::Result<()> {
|
fn finish_colon_def(&mut self) -> anyhow::Result<()> {
|
||||||
if self.state == 0 {
|
if self.state == 0 {
|
||||||
anyhow::bail!("not in compile mode");
|
anyhow::bail!("not in compile mode");
|
||||||
@@ -1438,6 +1455,7 @@ impl ForthVM {
|
|||||||
.take()
|
.take()
|
||||||
.ok_or_else(|| anyhow::anyhow!("no word being compiled"))?;
|
.ok_or_else(|| anyhow::anyhow!("no word being compiled"))?;
|
||||||
let ir = std::mem::take(&mut self.compiling_ir);
|
let ir = std::mem::take(&mut self.compiling_ir);
|
||||||
|
let ir = Self::optimize_ir(ir);
|
||||||
|
|
||||||
// Compile to WASM
|
// Compile to WASM
|
||||||
let config = CodegenConfig {
|
let config = CodegenConfig {
|
||||||
@@ -1753,6 +1771,7 @@ impl ForthVM {
|
|||||||
immediate: bool,
|
immediate: bool,
|
||||||
ir_body: Vec<IrOp>,
|
ir_body: Vec<IrOp>,
|
||||||
) -> anyhow::Result<WordId> {
|
) -> anyhow::Result<WordId> {
|
||||||
|
let ir_body = Self::optimize_ir(ir_body);
|
||||||
let word_id = self
|
let word_id = self
|
||||||
.dictionary
|
.dictionary
|
||||||
.create(name, immediate)
|
.create(name, immediate)
|
||||||
@@ -10070,4 +10089,18 @@ mod tests {
|
|||||||
assert_eq!(eval_stack("1E 1.5E 1E F~"), vec![-1]); // |1-1.5| < 1
|
assert_eq!(eval_stack("1E 1.5E 1E F~"), vec![-1]); // |1-1.5| < 1
|
||||||
assert_eq!(eval_stack("1E 2.5E 1E F~"), vec![0]); // |1-2.5| = 1.5 >= 1
|
assert_eq!(eval_stack("1E 2.5E 1E F~"), vec![0]); // |1-2.5| = 1.5 >= 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn optimizer_doesnt_break_basic_arithmetic() {
|
||||||
|
assert_eq!(eval_stack("5 3 +"), vec![8]);
|
||||||
|
assert_eq!(eval_stack("10 3 -"), vec![7]);
|
||||||
|
assert_eq!(eval_stack(": SQUARE DUP * ; 7 SQUARE"), vec![49]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn optimizer_doesnt_break_control_flow() {
|
||||||
|
assert_eq!(eval_stack(": T1 1 IF 42 ELSE 0 THEN ; T1"), vec![42]);
|
||||||
|
assert_eq!(eval_stack(": T2 0 IF 42 ELSE 0 THEN ; T2"), vec![0]);
|
||||||
|
assert_eq!(eval_stack(": SUM 0 SWAP 0 DO I + LOOP ; 10 SUM"), vec![45]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user