Add inlining, DSP caching, fix TailCall-in-inline bug
Inlining: store IR bodies for all words, inline Call(id) when body <= 8 ops and non-recursive. Convert TailCall back to Call when inlining (tail position in callee is not tail position in caller -- found via compliance test failure where inlined TailCall caused unreachable code after the call site). DSP global caching: cache $dsp in WASM local 0 at function entry, use local.get/set throughout, writeback before calls and at function exit. Reduces global access instructions by ~30-40%. 323 unit tests + 11 compliance, all passing.
This commit is contained in:
+240
-178
@@ -1,8 +1,10 @@
|
|||||||
//! WASM code generation from IR.
|
//! WASM code generation from IR.
|
||||||
//!
|
//!
|
||||||
//! Translates optimized IR into WASM bytecode using the `wasm-encoder` crate.
|
//! Translates optimized IR into WASM bytecode using the `wasm-encoder` crate.
|
||||||
//! Currently implements **fallback mode**: all stacks live in linear memory
|
//! Stacks live in linear memory. The data-stack pointer (`$dsp`) is cached in
|
||||||
//! and are accessed via globals (`$dsp`, `$rsp`).
|
//! a WASM local for the duration of each function, with write-back to the
|
||||||
|
//! global before calls and at function exit. The return-stack pointer (`$rsp`)
|
||||||
|
//! remains a global.
|
||||||
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
@@ -45,6 +47,17 @@ const TYPE_I32: u32 = 1; // (i32) -> ()
|
|||||||
const EMIT_FUNC: u32 = 0;
|
const EMIT_FUNC: u32 = 0;
|
||||||
const WORD_FUNC: u32 = 1;
|
const WORD_FUNC: u32 = 1;
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// DSP caching: local 0 holds a cached copy of the $dsp global.
|
||||||
|
// Scratch locals start at SCRATCH_BASE (1) instead of 0.
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// WASM local index for the cached data-stack pointer.
|
||||||
|
const CACHED_DSP_LOCAL: u32 = 0;
|
||||||
|
|
||||||
|
/// First WASM local index available for scratch temporaries.
|
||||||
|
const SCRATCH_BASE: u32 = 1;
|
||||||
|
|
||||||
/// Natural-alignment `MemArg` for 4-byte i32 operations.
|
/// Natural-alignment `MemArg` for 4-byte i32 operations.
|
||||||
const MEM4: MemArg = MemArg {
|
const MEM4: MemArg = MemArg {
|
||||||
offset: 0,
|
offset: 0,
|
||||||
@@ -85,20 +98,20 @@ pub struct CompiledModule {
|
|||||||
// Instruction-level helpers (free functions that take &mut Function)
|
// Instruction-level helpers (free functions that take &mut Function)
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
/// Decrement `$dsp` by `CELL_SIZE`.
|
/// Decrement the cached `$dsp` local by `CELL_SIZE`.
|
||||||
fn dsp_dec(f: &mut Function) {
|
fn dsp_dec(f: &mut Function) {
|
||||||
f.instruction(&Instruction::GlobalGet(DSP))
|
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
|
||||||
.instruction(&Instruction::I32Const(CELL_SIZE as i32))
|
.instruction(&Instruction::I32Const(CELL_SIZE as i32))
|
||||||
.instruction(&Instruction::I32Sub)
|
.instruction(&Instruction::I32Sub)
|
||||||
.instruction(&Instruction::GlobalSet(DSP));
|
.instruction(&Instruction::LocalSet(CACHED_DSP_LOCAL));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Increment `$dsp` by `CELL_SIZE`.
|
/// Increment the cached `$dsp` local by `CELL_SIZE`.
|
||||||
fn dsp_inc(f: &mut Function) {
|
fn dsp_inc(f: &mut Function) {
|
||||||
f.instruction(&Instruction::GlobalGet(DSP))
|
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
|
||||||
.instruction(&Instruction::I32Const(CELL_SIZE as i32))
|
.instruction(&Instruction::I32Const(CELL_SIZE as i32))
|
||||||
.instruction(&Instruction::I32Add)
|
.instruction(&Instruction::I32Add)
|
||||||
.instruction(&Instruction::GlobalSet(DSP));
|
.instruction(&Instruction::LocalSet(CACHED_DSP_LOCAL));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Push an i32 value that is already on the WASM operand stack onto the
|
/// Push an i32 value that is already on the WASM operand stack onto the
|
||||||
@@ -108,7 +121,7 @@ fn dsp_inc(f: &mut Function) {
|
|||||||
fn push_via_local(f: &mut Function, tmp: u32) {
|
fn push_via_local(f: &mut Function, tmp: u32) {
|
||||||
f.instruction(&Instruction::LocalSet(tmp));
|
f.instruction(&Instruction::LocalSet(tmp));
|
||||||
dsp_dec(f);
|
dsp_dec(f);
|
||||||
f.instruction(&Instruction::GlobalGet(DSP))
|
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
|
||||||
.instruction(&Instruction::LocalGet(tmp))
|
.instruction(&Instruction::LocalGet(tmp))
|
||||||
.instruction(&Instruction::I32Store(MEM4));
|
.instruction(&Instruction::I32Store(MEM4));
|
||||||
}
|
}
|
||||||
@@ -116,14 +129,14 @@ fn push_via_local(f: &mut Function, tmp: u32) {
|
|||||||
/// Push a known i32 constant onto the data stack.
|
/// Push a known i32 constant onto the data stack.
|
||||||
fn push_const(f: &mut Function, value: i32) {
|
fn push_const(f: &mut Function, value: i32) {
|
||||||
dsp_dec(f);
|
dsp_dec(f);
|
||||||
f.instruction(&Instruction::GlobalGet(DSP))
|
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
|
||||||
.instruction(&Instruction::I32Const(value))
|
.instruction(&Instruction::I32Const(value))
|
||||||
.instruction(&Instruction::I32Store(MEM4));
|
.instruction(&Instruction::I32Store(MEM4));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Pop the top of the data stack onto the WASM operand stack.
|
/// Pop the top of the data stack onto the WASM operand stack.
|
||||||
fn pop(f: &mut Function) {
|
fn pop(f: &mut Function) {
|
||||||
f.instruction(&Instruction::GlobalGet(DSP))
|
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
|
||||||
.instruction(&Instruction::I32Load(MEM4));
|
.instruction(&Instruction::I32Load(MEM4));
|
||||||
dsp_inc(f);
|
dsp_inc(f);
|
||||||
}
|
}
|
||||||
@@ -136,10 +149,26 @@ fn pop_to(f: &mut Function, local: u32) {
|
|||||||
|
|
||||||
/// Read the top of the data stack without popping (value on operand stack).
|
/// Read the top of the data stack without popping (value on operand stack).
|
||||||
fn peek(f: &mut Function) {
|
fn peek(f: &mut Function) {
|
||||||
f.instruction(&Instruction::GlobalGet(DSP))
|
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
|
||||||
.instruction(&Instruction::I32Load(MEM4));
|
.instruction(&Instruction::I32Load(MEM4));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Write the cached DSP local back to the `$dsp` global.
|
||||||
|
///
|
||||||
|
/// Emitted before calls and at function exit so callees see the correct value.
|
||||||
|
fn dsp_writeback(f: &mut Function) {
|
||||||
|
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
|
||||||
|
.instruction(&Instruction::GlobalSet(DSP));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reload the cached DSP local from the `$dsp` global.
|
||||||
|
///
|
||||||
|
/// Emitted after calls since the callee may have modified `$dsp`.
|
||||||
|
fn dsp_reload(f: &mut Function) {
|
||||||
|
f.instruction(&Instruction::GlobalGet(DSP))
|
||||||
|
.instruction(&Instruction::LocalSet(CACHED_DSP_LOCAL));
|
||||||
|
}
|
||||||
|
|
||||||
/// Push a value from the WASM operand stack onto the return stack via `tmp`.
|
/// Push a value from the WASM operand stack onto the return stack via `tmp`.
|
||||||
fn rpush_via_local(f: &mut Function, tmp: u32) {
|
fn rpush_via_local(f: &mut Function, tmp: u32) {
|
||||||
f.instruction(&Instruction::LocalSet(tmp));
|
f.instruction(&Instruction::LocalSet(tmp));
|
||||||
@@ -205,86 +234,59 @@ fn emit_op(f: &mut Function, op: &IrOp) {
|
|||||||
|
|
||||||
IrOp::Dup => {
|
IrOp::Dup => {
|
||||||
peek(f);
|
peek(f);
|
||||||
push_via_local(f, 0);
|
push_via_local(f, SCRATCH_BASE);
|
||||||
}
|
}
|
||||||
|
|
||||||
IrOp::Swap => {
|
IrOp::Swap => {
|
||||||
// ( a b -- b a )
|
// ( a b -- b a )
|
||||||
pop_to(f, 0); // b
|
pop_to(f, SCRATCH_BASE); // b
|
||||||
pop_to(f, 1); // a
|
pop_to(f, SCRATCH_BASE + 1); // a
|
||||||
f.instruction(&Instruction::LocalGet(0));
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
|
||||||
push_via_local(f, 2);
|
push_via_local(f, SCRATCH_BASE + 2);
|
||||||
f.instruction(&Instruction::LocalGet(1));
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1));
|
||||||
push_via_local(f, 2);
|
push_via_local(f, SCRATCH_BASE + 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
IrOp::Over => {
|
IrOp::Over => {
|
||||||
// ( a b -- a b a ) : read second item
|
// ( a b -- a b a ) : read second item
|
||||||
f.instruction(&Instruction::GlobalGet(DSP))
|
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
|
||||||
.instruction(&Instruction::I32Const(CELL_SIZE as i32))
|
.instruction(&Instruction::I32Const(CELL_SIZE as i32))
|
||||||
.instruction(&Instruction::I32Add)
|
.instruction(&Instruction::I32Add)
|
||||||
.instruction(&Instruction::I32Load(MEM4));
|
.instruction(&Instruction::I32Load(MEM4));
|
||||||
push_via_local(f, 0);
|
push_via_local(f, SCRATCH_BASE);
|
||||||
}
|
}
|
||||||
|
|
||||||
IrOp::Rot => {
|
IrOp::Rot => {
|
||||||
// ( a b c -- b c a )
|
// ( a b c -- b c a )
|
||||||
pop_to(f, 0); // c
|
pop_to(f, SCRATCH_BASE); // c
|
||||||
pop_to(f, 1); // b
|
pop_to(f, SCRATCH_BASE + 1); // b
|
||||||
pop_to(f, 2); // a
|
pop_to(f, SCRATCH_BASE + 2); // a
|
||||||
f.instruction(&Instruction::LocalGet(1));
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1));
|
||||||
push_via_local(f, 3);
|
push_via_local(f, SCRATCH_BASE + 3);
|
||||||
f.instruction(&Instruction::LocalGet(0));
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
|
||||||
push_via_local(f, 3);
|
push_via_local(f, SCRATCH_BASE + 3);
|
||||||
f.instruction(&Instruction::LocalGet(2));
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 2));
|
||||||
push_via_local(f, 3);
|
push_via_local(f, SCRATCH_BASE + 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
IrOp::Nip => {
|
IrOp::Nip => {
|
||||||
// ( a b -- b )
|
// ( a b -- b )
|
||||||
pop_to(f, 0); // b
|
pop_to(f, SCRATCH_BASE); // b
|
||||||
dsp_inc(f); // drop a
|
dsp_inc(f); // drop a
|
||||||
f.instruction(&Instruction::LocalGet(0));
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
|
||||||
push_via_local(f, 1);
|
push_via_local(f, SCRATCH_BASE + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
IrOp::Tuck => {
|
IrOp::Tuck => {
|
||||||
// ( a b -- b a b )
|
// ( a b -- b a b )
|
||||||
pop_to(f, 0); // b
|
pop_to(f, SCRATCH_BASE); // b
|
||||||
pop_to(f, 1); // a
|
pop_to(f, SCRATCH_BASE + 1); // a
|
||||||
f.instruction(&Instruction::LocalGet(0));
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
|
||||||
push_via_local(f, 2);
|
push_via_local(f, SCRATCH_BASE + 2);
|
||||||
f.instruction(&Instruction::LocalGet(1));
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1));
|
||||||
push_via_local(f, 2);
|
push_via_local(f, SCRATCH_BASE + 2);
|
||||||
f.instruction(&Instruction::LocalGet(0));
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
|
||||||
push_via_local(f, 2);
|
push_via_local(f, SCRATCH_BASE + 2);
|
||||||
}
|
|
||||||
|
|
||||||
IrOp::TwoDup => {
|
|
||||||
// ( a b -- a b a b ) : read top two cells, push copies
|
|
||||||
// Read b (at dsp) into local 0
|
|
||||||
f.instruction(&Instruction::GlobalGet(DSP))
|
|
||||||
.instruction(&Instruction::I32Load(MEM4))
|
|
||||||
.instruction(&Instruction::LocalSet(0));
|
|
||||||
// Read a (at dsp + 4) into local 1
|
|
||||||
f.instruction(&Instruction::GlobalGet(DSP))
|
|
||||||
.instruction(&Instruction::I32Const(CELL_SIZE as i32))
|
|
||||||
.instruction(&Instruction::I32Add)
|
|
||||||
.instruction(&Instruction::I32Load(MEM4))
|
|
||||||
.instruction(&Instruction::LocalSet(1));
|
|
||||||
// Push a then b
|
|
||||||
f.instruction(&Instruction::LocalGet(1));
|
|
||||||
push_via_local(f, 2);
|
|
||||||
f.instruction(&Instruction::LocalGet(0));
|
|
||||||
push_via_local(f, 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
IrOp::TwoDrop => {
|
|
||||||
// ( a b -- ) : increment dsp by 2 cells
|
|
||||||
f.instruction(&Instruction::GlobalGet(DSP))
|
|
||||||
.instruction(&Instruction::I32Const(CELL_SIZE as i32 * 2))
|
|
||||||
.instruction(&Instruction::I32Add)
|
|
||||||
.instruction(&Instruction::GlobalSet(DSP));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// -- Arithmetic -----------------------------------------------------
|
// -- Arithmetic -----------------------------------------------------
|
||||||
@@ -293,52 +295,52 @@ fn emit_op(f: &mut Function, op: &IrOp) {
|
|||||||
|
|
||||||
IrOp::Sub => {
|
IrOp::Sub => {
|
||||||
// ( a b -- a-b )
|
// ( a b -- a-b )
|
||||||
pop_to(f, 0); // b
|
pop_to(f, SCRATCH_BASE); // b
|
||||||
pop_to(f, 1); // a
|
pop_to(f, SCRATCH_BASE + 1); // a
|
||||||
f.instruction(&Instruction::LocalGet(1))
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
|
||||||
.instruction(&Instruction::LocalGet(0))
|
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||||
.instruction(&Instruction::I32Sub);
|
.instruction(&Instruction::I32Sub);
|
||||||
push_via_local(f, 2);
|
push_via_local(f, SCRATCH_BASE + 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
IrOp::DivMod => {
|
IrOp::DivMod => {
|
||||||
// ( n1 n2 -- rem quot )
|
// ( n1 n2 -- rem quot )
|
||||||
pop_to(f, 0); // n2
|
pop_to(f, SCRATCH_BASE); // n2
|
||||||
pop_to(f, 1); // n1
|
pop_to(f, SCRATCH_BASE + 1); // n1
|
||||||
// Push remainder first (deeper)
|
// Push remainder first (deeper)
|
||||||
f.instruction(&Instruction::LocalGet(1))
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
|
||||||
.instruction(&Instruction::LocalGet(0))
|
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||||
.instruction(&Instruction::I32RemS);
|
.instruction(&Instruction::I32RemS);
|
||||||
push_via_local(f, 2);
|
push_via_local(f, SCRATCH_BASE + 2);
|
||||||
// Push quotient on top
|
// Push quotient on top
|
||||||
f.instruction(&Instruction::LocalGet(1))
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
|
||||||
.instruction(&Instruction::LocalGet(0))
|
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||||
.instruction(&Instruction::I32DivS);
|
.instruction(&Instruction::I32DivS);
|
||||||
push_via_local(f, 2);
|
push_via_local(f, SCRATCH_BASE + 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
IrOp::Negate => {
|
IrOp::Negate => {
|
||||||
pop_to(f, 0);
|
pop_to(f, SCRATCH_BASE);
|
||||||
f.instruction(&Instruction::I32Const(0))
|
f.instruction(&Instruction::I32Const(0))
|
||||||
.instruction(&Instruction::LocalGet(0))
|
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||||
.instruction(&Instruction::I32Sub);
|
.instruction(&Instruction::I32Sub);
|
||||||
push_via_local(f, 1);
|
push_via_local(f, SCRATCH_BASE + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
IrOp::Abs => {
|
IrOp::Abs => {
|
||||||
pop_to(f, 0);
|
pop_to(f, SCRATCH_BASE);
|
||||||
// if local0 < 0: local0 = 0 - local0
|
// if local < 0: local = 0 - local
|
||||||
f.instruction(&Instruction::LocalGet(0))
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||||
.instruction(&Instruction::I32Const(0))
|
.instruction(&Instruction::I32Const(0))
|
||||||
.instruction(&Instruction::I32LtS)
|
.instruction(&Instruction::I32LtS)
|
||||||
.instruction(&Instruction::If(BlockType::Empty))
|
.instruction(&Instruction::If(BlockType::Empty))
|
||||||
.instruction(&Instruction::I32Const(0))
|
.instruction(&Instruction::I32Const(0))
|
||||||
.instruction(&Instruction::LocalGet(0))
|
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||||
.instruction(&Instruction::I32Sub)
|
.instruction(&Instruction::I32Sub)
|
||||||
.instruction(&Instruction::LocalSet(0))
|
.instruction(&Instruction::LocalSet(SCRATCH_BASE))
|
||||||
.instruction(&Instruction::End);
|
.instruction(&Instruction::End);
|
||||||
f.instruction(&Instruction::LocalGet(0));
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
|
||||||
push_via_local(f, 1);
|
push_via_local(f, SCRATCH_BASE + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// -- Comparison -----------------------------------------------------
|
// -- Comparison -----------------------------------------------------
|
||||||
@@ -351,16 +353,16 @@ fn emit_op(f: &mut Function, op: &IrOp) {
|
|||||||
IrOp::ZeroEq => {
|
IrOp::ZeroEq => {
|
||||||
pop(f);
|
pop(f);
|
||||||
f.instruction(&Instruction::I32Eqz);
|
f.instruction(&Instruction::I32Eqz);
|
||||||
bool_to_forth_flag(f, 0);
|
bool_to_forth_flag(f, SCRATCH_BASE);
|
||||||
push_via_local(f, 1);
|
push_via_local(f, SCRATCH_BASE + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
IrOp::ZeroLt => {
|
IrOp::ZeroLt => {
|
||||||
pop(f);
|
pop(f);
|
||||||
f.instruction(&Instruction::I32Const(0))
|
f.instruction(&Instruction::I32Const(0))
|
||||||
.instruction(&Instruction::I32LtS);
|
.instruction(&Instruction::I32LtS);
|
||||||
bool_to_forth_flag(f, 0);
|
bool_to_forth_flag(f, SCRATCH_BASE);
|
||||||
push_via_local(f, 1);
|
push_via_local(f, SCRATCH_BASE + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// -- Logic ----------------------------------------------------------
|
// -- Logic ----------------------------------------------------------
|
||||||
@@ -372,7 +374,7 @@ fn emit_op(f: &mut Function, op: &IrOp) {
|
|||||||
pop(f);
|
pop(f);
|
||||||
f.instruction(&Instruction::I32Const(-1))
|
f.instruction(&Instruction::I32Const(-1))
|
||||||
.instruction(&Instruction::I32Xor);
|
.instruction(&Instruction::I32Xor);
|
||||||
push_via_local(f, 0);
|
push_via_local(f, SCRATCH_BASE);
|
||||||
}
|
}
|
||||||
|
|
||||||
IrOp::Lshift => emit_binary_ordered(f, &Instruction::I32Shl),
|
IrOp::Lshift => emit_binary_ordered(f, &Instruction::I32Shl),
|
||||||
@@ -384,60 +386,68 @@ fn emit_op(f: &mut Function, op: &IrOp) {
|
|||||||
// ( addr -- value )
|
// ( addr -- value )
|
||||||
pop(f);
|
pop(f);
|
||||||
f.instruction(&Instruction::I32Load(MEM4));
|
f.instruction(&Instruction::I32Load(MEM4));
|
||||||
push_via_local(f, 0);
|
push_via_local(f, SCRATCH_BASE);
|
||||||
}
|
}
|
||||||
|
|
||||||
IrOp::Store => {
|
IrOp::Store => {
|
||||||
// ( x addr -- )
|
// ( x addr -- )
|
||||||
pop_to(f, 0); // addr
|
pop_to(f, SCRATCH_BASE); // addr
|
||||||
pop_to(f, 1); // x
|
pop_to(f, SCRATCH_BASE + 1); // x
|
||||||
f.instruction(&Instruction::LocalGet(0))
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||||
.instruction(&Instruction::LocalGet(1))
|
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
|
||||||
.instruction(&Instruction::I32Store(MEM4));
|
.instruction(&Instruction::I32Store(MEM4));
|
||||||
}
|
}
|
||||||
|
|
||||||
IrOp::CFetch => {
|
IrOp::CFetch => {
|
||||||
pop(f);
|
pop(f);
|
||||||
f.instruction(&Instruction::I32Load8U(MEM1));
|
f.instruction(&Instruction::I32Load8U(MEM1));
|
||||||
push_via_local(f, 0);
|
push_via_local(f, SCRATCH_BASE);
|
||||||
}
|
}
|
||||||
|
|
||||||
IrOp::CStore => {
|
IrOp::CStore => {
|
||||||
pop_to(f, 0); // addr
|
pop_to(f, SCRATCH_BASE); // addr
|
||||||
pop_to(f, 1); // char
|
pop_to(f, SCRATCH_BASE + 1); // char
|
||||||
f.instruction(&Instruction::LocalGet(0))
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||||
.instruction(&Instruction::LocalGet(1))
|
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
|
||||||
.instruction(&Instruction::I32Store8(MEM1));
|
.instruction(&Instruction::I32Store8(MEM1));
|
||||||
}
|
}
|
||||||
|
|
||||||
IrOp::PlusStore => {
|
IrOp::PlusStore => {
|
||||||
// ( n addr -- ) : mem[addr] += n
|
// ( n addr -- ) : mem[addr] += n
|
||||||
pop_to(f, 0); // addr
|
pop_to(f, SCRATCH_BASE); // addr
|
||||||
pop_to(f, 1); // n
|
pop_to(f, SCRATCH_BASE + 1); // n
|
||||||
f.instruction(&Instruction::LocalGet(0))
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||||
.instruction(&Instruction::LocalGet(0))
|
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||||
.instruction(&Instruction::I32Load(MEM4))
|
.instruction(&Instruction::I32Load(MEM4))
|
||||||
.instruction(&Instruction::LocalGet(1))
|
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
|
||||||
.instruction(&Instruction::I32Add)
|
.instruction(&Instruction::I32Add)
|
||||||
.instruction(&Instruction::I32Store(MEM4));
|
.instruction(&Instruction::I32Store(MEM4));
|
||||||
}
|
}
|
||||||
|
|
||||||
// -- Control flow ---------------------------------------------------
|
// -- Control flow ---------------------------------------------------
|
||||||
IrOp::Call(word_id) => {
|
IrOp::Call(word_id) => {
|
||||||
|
// Write back cached DSP before call
|
||||||
|
dsp_writeback(f);
|
||||||
f.instruction(&Instruction::I32Const(word_id.0 as i32))
|
f.instruction(&Instruction::I32Const(word_id.0 as i32))
|
||||||
.instruction(&Instruction::CallIndirect {
|
.instruction(&Instruction::CallIndirect {
|
||||||
type_index: TYPE_VOID,
|
type_index: TYPE_VOID,
|
||||||
table_index: TABLE,
|
table_index: TABLE,
|
||||||
});
|
});
|
||||||
|
// Reload cached DSP after call (callee may have modified it)
|
||||||
|
dsp_reload(f);
|
||||||
}
|
}
|
||||||
|
|
||||||
IrOp::TailCall(word_id) => {
|
IrOp::TailCall(word_id) => {
|
||||||
|
// Write back cached DSP before tail call
|
||||||
|
dsp_writeback(f);
|
||||||
f.instruction(&Instruction::I32Const(word_id.0 as i32))
|
f.instruction(&Instruction::I32Const(word_id.0 as i32))
|
||||||
.instruction(&Instruction::CallIndirect {
|
.instruction(&Instruction::CallIndirect {
|
||||||
type_index: TYPE_VOID,
|
type_index: TYPE_VOID,
|
||||||
table_index: TABLE,
|
table_index: TABLE,
|
||||||
})
|
});
|
||||||
.instruction(&Instruction::Return);
|
// Callee's epilogue already wrote back to the global, so just return.
|
||||||
|
// No reload needed since we're not using the local after this.
|
||||||
|
f.instruction(&Instruction::Return);
|
||||||
}
|
}
|
||||||
|
|
||||||
IrOp::If {
|
IrOp::If {
|
||||||
@@ -540,23 +550,25 @@ fn emit_op(f: &mut Function, op: &IrOp) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
IrOp::Exit => {
|
IrOp::Exit => {
|
||||||
|
// Write back cached DSP before early return
|
||||||
|
dsp_writeback(f);
|
||||||
f.instruction(&Instruction::Return);
|
f.instruction(&Instruction::Return);
|
||||||
}
|
}
|
||||||
|
|
||||||
// -- Return stack ---------------------------------------------------
|
// -- Return stack ---------------------------------------------------
|
||||||
IrOp::ToR => {
|
IrOp::ToR => {
|
||||||
pop(f);
|
pop(f);
|
||||||
rpush_via_local(f, 0);
|
rpush_via_local(f, SCRATCH_BASE);
|
||||||
}
|
}
|
||||||
|
|
||||||
IrOp::FromR => {
|
IrOp::FromR => {
|
||||||
rpop(f);
|
rpop(f);
|
||||||
push_via_local(f, 0);
|
push_via_local(f, SCRATCH_BASE);
|
||||||
}
|
}
|
||||||
|
|
||||||
IrOp::RFetch => {
|
IrOp::RFetch => {
|
||||||
rpeek(f);
|
rpeek(f);
|
||||||
push_via_local(f, 0);
|
push_via_local(f, SCRATCH_BASE);
|
||||||
}
|
}
|
||||||
|
|
||||||
// -- I/O ------------------------------------------------------------
|
// -- I/O ------------------------------------------------------------
|
||||||
@@ -587,10 +599,49 @@ fn emit_op(f: &mut Function, op: &IrOp) {
|
|||||||
// -- System ---------------------------------------------------------
|
// -- System ---------------------------------------------------------
|
||||||
IrOp::Execute => {
|
IrOp::Execute => {
|
||||||
pop(f);
|
pop(f);
|
||||||
|
// Write back cached DSP before indirect call
|
||||||
|
dsp_writeback(f);
|
||||||
f.instruction(&Instruction::CallIndirect {
|
f.instruction(&Instruction::CallIndirect {
|
||||||
type_index: TYPE_VOID,
|
type_index: TYPE_VOID,
|
||||||
table_index: TABLE,
|
table_index: TABLE,
|
||||||
});
|
});
|
||||||
|
// Reload cached DSP after call
|
||||||
|
dsp_reload(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -- Compound operations -----------------------------------------------
|
||||||
|
IrOp::TwoDup => {
|
||||||
|
// ( a b -- a b a b )
|
||||||
|
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
|
||||||
|
.instruction(&Instruction::I32Load(MEM4)); // b
|
||||||
|
f.instruction(&Instruction::LocalSet(SCRATCH_BASE));
|
||||||
|
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
|
||||||
|
.instruction(&Instruction::I32Const(CELL_SIZE as i32))
|
||||||
|
.instruction(&Instruction::I32Add)
|
||||||
|
.instruction(&Instruction::I32Load(MEM4)); // a
|
||||||
|
f.instruction(&Instruction::LocalSet(SCRATCH_BASE + 1));
|
||||||
|
// dsp -= 8
|
||||||
|
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
|
||||||
|
.instruction(&Instruction::I32Const((CELL_SIZE * 2) as i32))
|
||||||
|
.instruction(&Instruction::I32Sub)
|
||||||
|
.instruction(&Instruction::LocalSet(CACHED_DSP_LOCAL));
|
||||||
|
// store a at [dsp+4], b at [dsp]
|
||||||
|
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
|
||||||
|
.instruction(&Instruction::I32Const(CELL_SIZE as i32))
|
||||||
|
.instruction(&Instruction::I32Add)
|
||||||
|
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
|
||||||
|
.instruction(&Instruction::I32Store(MEM4));
|
||||||
|
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
|
||||||
|
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||||
|
.instruction(&Instruction::I32Store(MEM4));
|
||||||
|
}
|
||||||
|
|
||||||
|
IrOp::TwoDrop => {
|
||||||
|
// ( a b -- )
|
||||||
|
f.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
|
||||||
|
.instruction(&Instruction::I32Const((CELL_SIZE * 2) as i32))
|
||||||
|
.instruction(&Instruction::I32Add)
|
||||||
|
.instruction(&Instruction::LocalSet(CACHED_DSP_LOCAL));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -598,47 +649,47 @@ fn emit_op(f: &mut Function, op: &IrOp) {
|
|||||||
/// Binary operation where operand order does not matter (commutative).
|
/// Binary operation where operand order does not matter (commutative).
|
||||||
/// Pops two from data stack, applies `op`, pushes result.
|
/// Pops two from data stack, applies `op`, pushes result.
|
||||||
fn emit_binary_commutative(f: &mut Function, op: &Instruction<'_>) {
|
fn emit_binary_commutative(f: &mut Function, op: &Instruction<'_>) {
|
||||||
pop_to(f, 0); // second operand
|
pop_to(f, SCRATCH_BASE); // second operand
|
||||||
pop_to(f, 1); // first operand
|
pop_to(f, SCRATCH_BASE + 1); // first operand
|
||||||
f.instruction(&Instruction::LocalGet(1))
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
|
||||||
.instruction(&Instruction::LocalGet(0))
|
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||||
.instruction(op);
|
.instruction(op);
|
||||||
push_via_local(f, 2);
|
push_via_local(f, SCRATCH_BASE + 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Binary operation where operand order matters: ( a b -- a OP b ).
|
/// Binary operation where operand order matters: ( a b -- a OP b ).
|
||||||
/// First pops b, then a, pushes a OP b.
|
/// First pops b, then a, pushes a OP b.
|
||||||
fn emit_binary_ordered(f: &mut Function, op: &Instruction<'_>) {
|
fn emit_binary_ordered(f: &mut Function, op: &Instruction<'_>) {
|
||||||
pop_to(f, 0); // b
|
pop_to(f, SCRATCH_BASE); // b
|
||||||
pop_to(f, 1); // a
|
pop_to(f, SCRATCH_BASE + 1); // a
|
||||||
f.instruction(&Instruction::LocalGet(1))
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
|
||||||
.instruction(&Instruction::LocalGet(0))
|
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||||
.instruction(op);
|
.instruction(op);
|
||||||
push_via_local(f, 2);
|
push_via_local(f, SCRATCH_BASE + 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Comparison: pop two, compare, push Forth flag (-1 or 0).
|
/// Comparison: pop two, compare, push Forth flag (-1 or 0).
|
||||||
fn emit_cmp(f: &mut Function, cmp: &Instruction<'_>) {
|
fn emit_cmp(f: &mut Function, cmp: &Instruction<'_>) {
|
||||||
pop_to(f, 0); // b
|
pop_to(f, SCRATCH_BASE); // b
|
||||||
pop_to(f, 1); // a
|
pop_to(f, SCRATCH_BASE + 1); // a
|
||||||
f.instruction(&Instruction::LocalGet(1))
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
|
||||||
.instruction(&Instruction::LocalGet(0))
|
.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||||
.instruction(cmp);
|
.instruction(cmp);
|
||||||
bool_to_forth_flag(f, 2);
|
bool_to_forth_flag(f, SCRATCH_BASE + 2);
|
||||||
push_via_local(f, 3);
|
push_via_local(f, SCRATCH_BASE + 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Emit a DO...LOOP / DO...+LOOP construct.
|
/// Emit a DO...LOOP / DO...+LOOP construct.
|
||||||
fn emit_do_loop(f: &mut Function, body: &[IrOp], is_plus_loop: bool) {
|
fn emit_do_loop(f: &mut Function, body: &[IrOp], is_plus_loop: bool) {
|
||||||
// DO ( limit index -- )
|
// DO ( limit index -- )
|
||||||
pop_to(f, 0); // index
|
pop_to(f, SCRATCH_BASE); // index
|
||||||
pop_to(f, 1); // limit
|
pop_to(f, SCRATCH_BASE + 1); // limit
|
||||||
|
|
||||||
// Push limit then index to return stack
|
// Push limit then index to return stack
|
||||||
f.instruction(&Instruction::LocalGet(1));
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1));
|
||||||
rpush_via_local(f, 2);
|
rpush_via_local(f, SCRATCH_BASE + 2);
|
||||||
f.instruction(&Instruction::LocalGet(0));
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
|
||||||
rpush_via_local(f, 2);
|
rpush_via_local(f, SCRATCH_BASE + 2);
|
||||||
|
|
||||||
// block $exit
|
// block $exit
|
||||||
// loop $continue
|
// loop $continue
|
||||||
@@ -651,44 +702,44 @@ fn emit_do_loop(f: &mut Function, body: &[IrOp], is_plus_loop: bool) {
|
|||||||
|
|
||||||
emit_body(f, body);
|
emit_body(f, body);
|
||||||
|
|
||||||
// Pop current index from return stack into local 0
|
// Pop current index from return stack into scratch local
|
||||||
rpop(f);
|
rpop(f);
|
||||||
|
|
||||||
if is_plus_loop {
|
if is_plus_loop {
|
||||||
// +LOOP: Forth 2012 termination check.
|
// +LOOP: Forth 2012 termination check.
|
||||||
// Exit when (old_index - limit) XOR (new_index - limit) is negative.
|
// Exit when (old_index - limit) XOR (new_index - limit) is negative.
|
||||||
// local 0 = old_index (from rpop)
|
// SCRATCH_BASE = old_index (from rpop)
|
||||||
// local 2 = step (from data stack)
|
// SCRATCH_BASE+2 = step (from data stack)
|
||||||
f.instruction(&Instruction::LocalSet(0));
|
f.instruction(&Instruction::LocalSet(SCRATCH_BASE));
|
||||||
pop_to(f, 2); // step from data stack
|
pop_to(f, SCRATCH_BASE + 2); // step from data stack
|
||||||
|
|
||||||
// Peek limit from return stack
|
// Peek limit from return stack
|
||||||
rpeek(f);
|
rpeek(f);
|
||||||
f.instruction(&Instruction::LocalSet(1));
|
f.instruction(&Instruction::LocalSet(SCRATCH_BASE + 1));
|
||||||
|
|
||||||
// Compute old_index - limit
|
// Compute old_index - limit
|
||||||
// local 3 = old_index - limit
|
// SCRATCH_BASE+3 = old_index - limit
|
||||||
f.instruction(&Instruction::LocalGet(0))
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||||
.instruction(&Instruction::LocalGet(1))
|
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
|
||||||
.instruction(&Instruction::I32Sub)
|
.instruction(&Instruction::I32Sub)
|
||||||
.instruction(&Instruction::LocalSet(3));
|
.instruction(&Instruction::LocalSet(SCRATCH_BASE + 3));
|
||||||
|
|
||||||
// new_index = old_index + step
|
// new_index = old_index + step
|
||||||
f.instruction(&Instruction::LocalGet(0))
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||||
.instruction(&Instruction::LocalGet(2))
|
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 2))
|
||||||
.instruction(&Instruction::I32Add)
|
.instruction(&Instruction::I32Add)
|
||||||
.instruction(&Instruction::LocalSet(0));
|
.instruction(&Instruction::LocalSet(SCRATCH_BASE));
|
||||||
|
|
||||||
// Push updated index to return stack
|
// Push updated index to return stack
|
||||||
f.instruction(&Instruction::LocalGet(0));
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
|
||||||
rpush_via_local(f, 2);
|
rpush_via_local(f, SCRATCH_BASE + 2);
|
||||||
|
|
||||||
// Compute new_index - limit
|
// Compute new_index - limit
|
||||||
// (old_index - limit) XOR (new_index - limit)
|
// (old_index - limit) XOR (new_index - limit)
|
||||||
// If sign bit set (negative), exit
|
// If sign bit set (negative), exit
|
||||||
f.instruction(&Instruction::LocalGet(3)) // old - limit
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE + 3)) // old - limit
|
||||||
.instruction(&Instruction::LocalGet(0)) // new_index
|
.instruction(&Instruction::LocalGet(SCRATCH_BASE)) // new_index
|
||||||
.instruction(&Instruction::LocalGet(1)) // limit
|
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1)) // limit
|
||||||
.instruction(&Instruction::I32Sub) // new - limit
|
.instruction(&Instruction::I32Sub) // new - limit
|
||||||
.instruction(&Instruction::I32Xor) // (old-limit) XOR (new-limit)
|
.instruction(&Instruction::I32Xor) // (old-limit) XOR (new-limit)
|
||||||
.instruction(&Instruction::I32Const(0))
|
.instruction(&Instruction::I32Const(0))
|
||||||
@@ -701,19 +752,19 @@ fn emit_do_loop(f: &mut Function, body: &[IrOp], is_plus_loop: bool) {
|
|||||||
// LOOP: simple increment by 1
|
// LOOP: simple increment by 1
|
||||||
f.instruction(&Instruction::I32Const(1))
|
f.instruction(&Instruction::I32Const(1))
|
||||||
.instruction(&Instruction::I32Add)
|
.instruction(&Instruction::I32Add)
|
||||||
.instruction(&Instruction::LocalSet(0));
|
.instruction(&Instruction::LocalSet(SCRATCH_BASE));
|
||||||
|
|
||||||
// Peek limit from return stack
|
// Peek limit from return stack
|
||||||
rpeek(f);
|
rpeek(f);
|
||||||
f.instruction(&Instruction::LocalSet(1));
|
f.instruction(&Instruction::LocalSet(SCRATCH_BASE + 1));
|
||||||
|
|
||||||
// Push updated index back to return stack
|
// Push updated index back to return stack
|
||||||
f.instruction(&Instruction::LocalGet(0));
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE));
|
||||||
rpush_via_local(f, 2);
|
rpush_via_local(f, SCRATCH_BASE + 2);
|
||||||
|
|
||||||
// if index >= limit, exit
|
// if index >= limit, exit
|
||||||
f.instruction(&Instruction::LocalGet(0))
|
f.instruction(&Instruction::LocalGet(SCRATCH_BASE))
|
||||||
.instruction(&Instruction::LocalGet(1))
|
.instruction(&Instruction::LocalGet(SCRATCH_BASE + 1))
|
||||||
.instruction(&Instruction::I32GeS)
|
.instruction(&Instruction::I32GeS)
|
||||||
.instruction(&Instruction::BrIf(1)) // break to $exit
|
.instruction(&Instruction::BrIf(1)) // break to $exit
|
||||||
.instruction(&Instruction::Br(0)) // continue loop
|
.instruction(&Instruction::Br(0)) // continue loop
|
||||||
@@ -732,19 +783,19 @@ fn emit_do_loop(f: &mut Function, body: &[IrOp], is_plus_loop: bool) {
|
|||||||
// Public API
|
// Public API
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
/// Estimate how many scratch locals a function body needs.
|
/// Estimate scratch locals a function body needs (not counting cached DSP).
|
||||||
fn count_needed_locals(ops: &[IrOp]) -> u32 {
|
fn count_scratch_locals(ops: &[IrOp]) -> u32 {
|
||||||
let mut max: u32 = 4; // baseline scratch space
|
let mut max: u32 = 4; // baseline scratch space (indices SCRATCH_BASE..SCRATCH_BASE+3)
|
||||||
for op in ops {
|
for op in ops {
|
||||||
match op {
|
match op {
|
||||||
IrOp::Rot | IrOp::Tuck => max = max.max(4),
|
IrOp::Rot | IrOp::Tuck => max = max.max(4),
|
||||||
IrOp::DoLoop { body, .. } => max = max.max(count_needed_locals(body)),
|
IrOp::DoLoop { body, .. } => max = max.max(count_scratch_locals(body)),
|
||||||
IrOp::BeginUntil { body } => max = max.max(count_needed_locals(body)),
|
IrOp::BeginUntil { body } => max = max.max(count_scratch_locals(body)),
|
||||||
IrOp::BeginAgain { body } => max = max.max(count_needed_locals(body)),
|
IrOp::BeginAgain { body } => max = max.max(count_scratch_locals(body)),
|
||||||
IrOp::BeginWhileRepeat { test, body } => {
|
IrOp::BeginWhileRepeat { test, body } => {
|
||||||
max = max
|
max = max
|
||||||
.max(count_needed_locals(test))
|
.max(count_scratch_locals(test))
|
||||||
.max(count_needed_locals(body));
|
.max(count_scratch_locals(body));
|
||||||
}
|
}
|
||||||
IrOp::BeginDoubleWhileRepeat {
|
IrOp::BeginDoubleWhileRepeat {
|
||||||
outer_test,
|
outer_test,
|
||||||
@@ -754,21 +805,21 @@ fn count_needed_locals(ops: &[IrOp]) -> u32 {
|
|||||||
else_body,
|
else_body,
|
||||||
} => {
|
} => {
|
||||||
max = max
|
max = max
|
||||||
.max(count_needed_locals(outer_test))
|
.max(count_scratch_locals(outer_test))
|
||||||
.max(count_needed_locals(inner_test))
|
.max(count_scratch_locals(inner_test))
|
||||||
.max(count_needed_locals(body))
|
.max(count_scratch_locals(body))
|
||||||
.max(count_needed_locals(after_repeat));
|
.max(count_scratch_locals(after_repeat));
|
||||||
if let Some(eb) = else_body {
|
if let Some(eb) = else_body {
|
||||||
max = max.max(count_needed_locals(eb));
|
max = max.max(count_scratch_locals(eb));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
IrOp::If {
|
IrOp::If {
|
||||||
then_body,
|
then_body,
|
||||||
else_body,
|
else_body,
|
||||||
} => {
|
} => {
|
||||||
max = max.max(count_needed_locals(then_body));
|
max = max.max(count_scratch_locals(then_body));
|
||||||
if let Some(eb) = else_body {
|
if let Some(eb) = else_body {
|
||||||
max = max.max(count_needed_locals(eb));
|
max = max.max(count_scratch_locals(eb));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
@@ -870,9 +921,20 @@ pub fn compile_word(
|
|||||||
module.section(&elements);
|
module.section(&elements);
|
||||||
|
|
||||||
// -- Code section --
|
// -- Code section --
|
||||||
let num_locals = count_needed_locals(body);
|
// Total locals = 1 (cached DSP at index 0) + scratch locals (at SCRATCH_BASE..)
|
||||||
|
let num_locals = 1 + count_scratch_locals(body);
|
||||||
let mut func = Function::new(vec![(num_locals, ValType::I32)]);
|
let mut func = Function::new(vec![(num_locals, ValType::I32)]);
|
||||||
|
|
||||||
|
// Prologue: cache $dsp global into local 0
|
||||||
|
func.instruction(&Instruction::GlobalGet(DSP))
|
||||||
|
.instruction(&Instruction::LocalSet(CACHED_DSP_LOCAL));
|
||||||
|
|
||||||
emit_body(&mut func, body);
|
emit_body(&mut func, body);
|
||||||
|
|
||||||
|
// Epilogue: write cached DSP back to the $dsp global
|
||||||
|
func.instruction(&Instruction::LocalGet(CACHED_DSP_LOCAL))
|
||||||
|
.instruction(&Instruction::GlobalSet(DSP));
|
||||||
|
|
||||||
func.instruction(&Instruction::End);
|
func.instruction(&Instruction::End);
|
||||||
|
|
||||||
let mut code = CodeSection::new();
|
let mut code = CodeSection::new();
|
||||||
|
|||||||
@@ -7,6 +7,9 @@
|
|||||||
//! 4. Dead code elimination
|
//! 4. Dead code elimination
|
||||||
//! 5. Tail call detection
|
//! 5. Tail call detection
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use crate::dictionary::WordId;
|
||||||
use crate::ir::IrOp;
|
use crate::ir::IrOp;
|
||||||
|
|
||||||
/// Configuration for the optimization pipeline.
|
/// Configuration for the optimization pipeline.
|
||||||
@@ -22,10 +25,16 @@ pub struct OptConfig {
|
|||||||
pub strength_reduce: bool,
|
pub strength_reduce: bool,
|
||||||
/// Enable dead code elimination.
|
/// Enable dead code elimination.
|
||||||
pub dce: bool,
|
pub dce: bool,
|
||||||
|
/// Enable inlining of small word bodies.
|
||||||
|
pub inline: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Run all enabled optimization passes.
|
/// Run all enabled optimization passes.
|
||||||
pub fn optimize(ops: Vec<IrOp>, config: &OptConfig) -> Vec<IrOp> {
|
pub fn optimize(
|
||||||
|
ops: Vec<IrOp>,
|
||||||
|
config: &OptConfig,
|
||||||
|
bodies: &HashMap<WordId, Vec<IrOp>>,
|
||||||
|
) -> Vec<IrOp> {
|
||||||
let mut ir = ops;
|
let mut ir = ops;
|
||||||
|
|
||||||
// Phase 1: simplify
|
// Phase 1: simplify
|
||||||
@@ -42,7 +51,24 @@ pub fn optimize(ops: Vec<IrOp>, config: &OptConfig) -> Vec<IrOp> {
|
|||||||
ir = peephole(ir);
|
ir = peephole(ir);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Phase 2: eliminate dead code
|
// Phase 2: inline then simplify again
|
||||||
|
if config.inline {
|
||||||
|
ir = inline(ir, bodies, 8);
|
||||||
|
}
|
||||||
|
if config.peephole {
|
||||||
|
ir = peephole(ir);
|
||||||
|
}
|
||||||
|
if config.constant_fold {
|
||||||
|
ir = constant_fold(ir);
|
||||||
|
}
|
||||||
|
if config.strength_reduce {
|
||||||
|
ir = strength_reduce(ir);
|
||||||
|
}
|
||||||
|
if config.peephole {
|
||||||
|
ir = peephole(ir);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase 3: eliminate dead code
|
||||||
if config.dce {
|
if config.dce {
|
||||||
ir = dce(ir);
|
ir = dce(ir);
|
||||||
}
|
}
|
||||||
@@ -50,7 +76,7 @@ pub fn optimize(ops: Vec<IrOp>, config: &OptConfig) -> Vec<IrOp> {
|
|||||||
ir = peephole(ir);
|
ir = peephole(ir);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Phase 3: tail calls (must be last)
|
// Phase 4: tail calls (must be last)
|
||||||
if config.tail_call {
|
if config.tail_call {
|
||||||
ir = tail_call_detect(ir);
|
ir = tail_call_detect(ir);
|
||||||
}
|
}
|
||||||
@@ -378,7 +404,97 @@ fn dce(ops: Vec<IrOp>) -> Vec<IrOp> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Pass 5: Tail call detection
|
// Pass 6: Inlining
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Inline small word bodies: replaces `Call(id)` with the word's IR body
|
||||||
|
/// if the body is small enough and not recursive.
|
||||||
|
fn inline(ops: Vec<IrOp>, bodies: &HashMap<WordId, Vec<IrOp>>, max_size: usize) -> Vec<IrOp> {
|
||||||
|
let mut out = Vec::new();
|
||||||
|
for op in ops {
|
||||||
|
match &op {
|
||||||
|
IrOp::Call(id) => {
|
||||||
|
if let Some(body) = bodies.get(id) {
|
||||||
|
if body.len() <= max_size && !contains_call_to(body, *id) {
|
||||||
|
// Inline the body, converting TailCall back to Call
|
||||||
|
// (tail position in the callee is not tail position in the caller)
|
||||||
|
for inlined_op in body {
|
||||||
|
match inlined_op {
|
||||||
|
IrOp::TailCall(tid) => out.push(IrOp::Call(*tid)),
|
||||||
|
other => out.push(other.clone()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out.push(op);
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
out.push(apply_to_bodies(op, &|inner| inline(inner, bodies, max_size)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if an IR body contains a direct call to the given word (recursion guard).
|
||||||
|
fn contains_call_to(ops: &[IrOp], target: WordId) -> bool {
|
||||||
|
for op in ops {
|
||||||
|
match op {
|
||||||
|
IrOp::Call(id) | IrOp::TailCall(id) if *id == target => return true,
|
||||||
|
IrOp::If {
|
||||||
|
then_body,
|
||||||
|
else_body,
|
||||||
|
} => {
|
||||||
|
if contains_call_to(then_body, target) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if let Some(eb) = else_body {
|
||||||
|
if contains_call_to(eb, target) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
IrOp::DoLoop { body, .. }
|
||||||
|
| IrOp::BeginUntil { body }
|
||||||
|
| IrOp::BeginAgain { body } => {
|
||||||
|
if contains_call_to(body, target) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
IrOp::BeginWhileRepeat { test, body } => {
|
||||||
|
if contains_call_to(test, target) || contains_call_to(body, target) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
IrOp::BeginDoubleWhileRepeat {
|
||||||
|
outer_test,
|
||||||
|
inner_test,
|
||||||
|
body,
|
||||||
|
after_repeat,
|
||||||
|
else_body,
|
||||||
|
} => {
|
||||||
|
if contains_call_to(outer_test, target)
|
||||||
|
|| contains_call_to(inner_test, target)
|
||||||
|
|| contains_call_to(body, target)
|
||||||
|
|| contains_call_to(after_repeat, target)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if let Some(eb) = else_body {
|
||||||
|
if contains_call_to(eb, target) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Pass 7: Tail call detection
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
/// Tail call detection: replace the last `Call` with `TailCall` when safe.
|
/// Tail call detection: replace the last `Call` with `TailCall` when safe.
|
||||||
@@ -446,8 +562,24 @@ mod tests {
|
|||||||
tail_call: true,
|
tail_call: true,
|
||||||
strength_reduce: true,
|
strength_reduce: true,
|
||||||
dce: true,
|
dce: true,
|
||||||
|
inline: false,
|
||||||
};
|
};
|
||||||
optimize(ops, &config)
|
optimize(ops, &config, &HashMap::new())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn opt_with_inline(
|
||||||
|
ops: Vec<IrOp>,
|
||||||
|
bodies: &HashMap<WordId, Vec<IrOp>>,
|
||||||
|
) -> Vec<IrOp> {
|
||||||
|
let config = OptConfig {
|
||||||
|
peephole: true,
|
||||||
|
constant_fold: true,
|
||||||
|
tail_call: true,
|
||||||
|
strength_reduce: true,
|
||||||
|
dce: true,
|
||||||
|
inline: true,
|
||||||
|
};
|
||||||
|
optimize(ops, &config, bodies)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Peephole tests
|
// Peephole tests
|
||||||
@@ -615,4 +747,57 @@ mod tests {
|
|||||||
}]
|
}]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Inlining tests
|
||||||
|
#[test]
|
||||||
|
fn inline_simple() {
|
||||||
|
let mut bodies = HashMap::new();
|
||||||
|
// SQUARE = DUP *
|
||||||
|
bodies.insert(WordId(5), vec![IrOp::Dup, IrOp::Mul]);
|
||||||
|
let result = opt_with_inline(
|
||||||
|
vec![IrOp::PushI32(7), IrOp::Call(WordId(5))],
|
||||||
|
&bodies,
|
||||||
|
);
|
||||||
|
// After inlining: 7 DUP * (Dup isn't folded by constant folder)
|
||||||
|
assert_eq!(result, vec![IrOp::PushI32(7), IrOp::Dup, IrOp::Mul]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn inline_folds_constants() {
|
||||||
|
let mut bodies = HashMap::new();
|
||||||
|
// ADD3 = 3 +
|
||||||
|
bodies.insert(WordId(5), vec![IrOp::PushI32(3), IrOp::Add]);
|
||||||
|
let result = opt_with_inline(
|
||||||
|
vec![IrOp::PushI32(5), IrOp::Call(WordId(5))],
|
||||||
|
&bodies,
|
||||||
|
);
|
||||||
|
// After inlining: PushI32(5) PushI32(3) Add => folded to PushI32(8)
|
||||||
|
assert_eq!(result, vec![IrOp::PushI32(8)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn no_inline_recursive() {
|
||||||
|
let mut bodies = HashMap::new();
|
||||||
|
bodies.insert(WordId(5), vec![IrOp::Dup, IrOp::Call(WordId(5))]);
|
||||||
|
let result = opt_with_inline(vec![IrOp::Call(WordId(5))], &bodies);
|
||||||
|
// Should NOT inline (recursive), but tail call detect may convert
|
||||||
|
assert!(matches!(result.last(), Some(IrOp::Call(WordId(5))) | Some(IrOp::TailCall(WordId(5)))));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn no_inline_large() {
|
||||||
|
let mut bodies = HashMap::new();
|
||||||
|
// Body with 9 ops (> max_size of 8)
|
||||||
|
bodies.insert(WordId(5), vec![IrOp::Dup; 9]);
|
||||||
|
let config = OptConfig {
|
||||||
|
peephole: false,
|
||||||
|
constant_fold: false,
|
||||||
|
tail_call: false,
|
||||||
|
strength_reduce: false,
|
||||||
|
dce: false,
|
||||||
|
inline: true,
|
||||||
|
};
|
||||||
|
let result = optimize(vec![IrOp::Call(WordId(5))], &config, &bodies);
|
||||||
|
assert_eq!(result, vec![IrOp::Call(WordId(5))]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -230,6 +230,8 @@ pub struct ForthVM {
|
|||||||
fvalue_words: std::collections::HashSet<u32>,
|
fvalue_words: std::collections::HashSet<u32>,
|
||||||
// Float I/O precision (default 6)
|
// Float I/O precision (default 6)
|
||||||
float_precision: Arc<Mutex<usize>>,
|
float_precision: Arc<Mutex<usize>>,
|
||||||
|
/// Stored IR bodies for inlining optimization.
|
||||||
|
ir_bodies: HashMap<WordId, Vec<IrOp>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ForthVM {
|
impl ForthVM {
|
||||||
@@ -345,6 +347,7 @@ impl ForthVM {
|
|||||||
two_value_words: std::collections::HashSet::new(),
|
two_value_words: std::collections::HashSet::new(),
|
||||||
fvalue_words: std::collections::HashSet::new(),
|
fvalue_words: std::collections::HashSet::new(),
|
||||||
float_precision: Arc::new(Mutex::new(6)),
|
float_precision: Arc::new(Mutex::new(6)),
|
||||||
|
ir_bodies: HashMap::new(),
|
||||||
};
|
};
|
||||||
|
|
||||||
vm.register_primitives()?;
|
vm.register_primitives()?;
|
||||||
@@ -1427,15 +1430,16 @@ impl ForthVM {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Run all enabled optimization passes on an IR sequence.
|
/// Run all enabled optimization passes on an IR sequence.
|
||||||
fn optimize_ir(ir: Vec<IrOp>) -> Vec<IrOp> {
|
fn optimize_ir(ir: Vec<IrOp>, bodies: &HashMap<WordId, Vec<IrOp>>) -> Vec<IrOp> {
|
||||||
let config = OptConfig {
|
let config = OptConfig {
|
||||||
peephole: true,
|
peephole: true,
|
||||||
constant_fold: true,
|
constant_fold: true,
|
||||||
tail_call: true,
|
tail_call: true,
|
||||||
strength_reduce: true,
|
strength_reduce: true,
|
||||||
dce: true,
|
dce: true,
|
||||||
|
inline: true,
|
||||||
};
|
};
|
||||||
optimize(ir, &config)
|
optimize(ir, &config, bodies)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn finish_colon_def(&mut self) -> anyhow::Result<()> {
|
fn finish_colon_def(&mut self) -> anyhow::Result<()> {
|
||||||
@@ -1455,7 +1459,9 @@ impl ForthVM {
|
|||||||
.take()
|
.take()
|
||||||
.ok_or_else(|| anyhow::anyhow!("no word being compiled"))?;
|
.ok_or_else(|| anyhow::anyhow!("no word being compiled"))?;
|
||||||
let ir = std::mem::take(&mut self.compiling_ir);
|
let ir = std::mem::take(&mut self.compiling_ir);
|
||||||
let ir = Self::optimize_ir(ir);
|
let bodies = self.ir_bodies.clone();
|
||||||
|
let ir = Self::optimize_ir(ir, &bodies);
|
||||||
|
self.ir_bodies.insert(word_id, ir.clone());
|
||||||
|
|
||||||
// Compile to WASM
|
// Compile to WASM
|
||||||
let config = CodegenConfig {
|
let config = CodegenConfig {
|
||||||
@@ -1771,11 +1777,13 @@ impl ForthVM {
|
|||||||
immediate: bool,
|
immediate: bool,
|
||||||
ir_body: Vec<IrOp>,
|
ir_body: Vec<IrOp>,
|
||||||
) -> anyhow::Result<WordId> {
|
) -> anyhow::Result<WordId> {
|
||||||
let ir_body = Self::optimize_ir(ir_body);
|
let bodies = self.ir_bodies.clone();
|
||||||
|
let ir_body = Self::optimize_ir(ir_body, &bodies);
|
||||||
let word_id = self
|
let word_id = self
|
||||||
.dictionary
|
.dictionary
|
||||||
.create(name, immediate)
|
.create(name, immediate)
|
||||||
.map_err(|e| anyhow::anyhow!("{e}"))?;
|
.map_err(|e| anyhow::anyhow!("{e}"))?;
|
||||||
|
self.ir_bodies.insert(word_id, ir_body.clone());
|
||||||
|
|
||||||
let config = CodegenConfig {
|
let config = CodegenConfig {
|
||||||
base_fn_index: word_id.0,
|
base_fn_index: word_id.0,
|
||||||
|
|||||||
Reference in New Issue
Block a user