Enable stack-to-local promotion for DO/LOOP and IF/ELSE
Three bugs fixed to safely enable promotion for control flow: 1. compute_stack_needs now recurses into IF/DoLoop/Begin bodies, correctly calculating preload counts for promoted words with nested control flow (was flat, causing stack underflow). 2. BeginDoubleWhileRepeat rejected from promotion (boot.fth's -TRAILING uses this pattern, handler had structural bugs). 3. IF/ELSE branches must have same net stack effect for promotion (BITSSET? has asymmetric branches: 2 items vs 1). Performance with promotion enabled: - Factorial: 0.50x (2x faster than gforth) - Collatz: 0.38x (2.6x faster than gforth) - All 427 unit tests, 10/11 compliance, 35/35 behavioral pass
This commit is contained in:
+144
-66
@@ -1082,9 +1082,7 @@ fn is_promotable_body(ops: &[IrOp]) -> bool {
|
|||||||
for op in ops {
|
for op in ops {
|
||||||
match op {
|
match op {
|
||||||
IrOp::Call(_) | IrOp::TailCall(_) | IrOp::Execute | IrOp::SpFetch => return false,
|
IrOp::Call(_) | IrOp::TailCall(_) | IrOp::Execute | IrOp::SpFetch => return false,
|
||||||
IrOp::ToR | IrOp::FromR | IrOp::RFetch | IrOp::LoopJ | IrOp::Exit => {
|
IrOp::ToR | IrOp::FromR | IrOp::Exit => return false,
|
||||||
return false;
|
|
||||||
}
|
|
||||||
IrOp::ForthLocalGet(_) | IrOp::ForthLocalSet(_) => return false,
|
IrOp::ForthLocalGet(_) | IrOp::ForthLocalSet(_) => return false,
|
||||||
IrOp::Emit | IrOp::Dot | IrOp::Cr | IrOp::Type => return false,
|
IrOp::Emit | IrOp::Dot | IrOp::Cr | IrOp::Type => return false,
|
||||||
IrOp::PushI64(_) | IrOp::PushF64(_) => return false,
|
IrOp::PushI64(_) | IrOp::PushF64(_) => return false,
|
||||||
@@ -1111,10 +1109,38 @@ fn is_promotable_body(ops: &[IrOp]) -> bool {
|
|||||||
| IrOp::StoreFloat
|
| IrOp::StoreFloat
|
||||||
| IrOp::StoF
|
| IrOp::StoF
|
||||||
| IrOp::FtoS => return false,
|
| IrOp::FtoS => return false,
|
||||||
// Control flow not yet promoted in StackSim path
|
// IF with ELSE: promotable if both branches are promotable
|
||||||
IrOp::If { .. }
|
// and have the same net stack effect
|
||||||
| IrOp::DoLoop { .. }
|
IrOp::If { then_body, else_body } => {
|
||||||
| IrOp::BeginUntil { .. }
|
let Some(eb) = else_body else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
if !is_promotable_body(then_body) || !is_promotable_body(eb) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Both branches must have the same net stack effect
|
||||||
|
let (_, then_net) = compute_stack_needs(then_body);
|
||||||
|
let (_, else_net) = compute_stack_needs(eb);
|
||||||
|
if then_net != else_net {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// DO/LOOP: promotable if body is promotable and stack-neutral
|
||||||
|
IrOp::DoLoop { body, is_plus_loop } => {
|
||||||
|
if !is_promotable_body(body) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if body.iter().any(|op| matches!(op, IrOp::Exit)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
let (_, body_net) = compute_stack_needs(body);
|
||||||
|
let expected = if *is_plus_loop { 1 } else { 0 };
|
||||||
|
if body_net != expected {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// BEGIN loops and BeginDoubleWhileRepeat: not yet promoted
|
||||||
|
IrOp::BeginUntil { .. }
|
||||||
| IrOp::BeginAgain { .. }
|
| IrOp::BeginAgain { .. }
|
||||||
| IrOp::BeginWhileRepeat { .. }
|
| IrOp::BeginWhileRepeat { .. }
|
||||||
| IrOp::BeginDoubleWhileRepeat { .. } => return false,
|
| IrOp::BeginDoubleWhileRepeat { .. } => return false,
|
||||||
@@ -1171,6 +1197,8 @@ fn stack_delta(op: &IrOp) -> i32 {
|
|||||||
IrOp::FZeroEq | IrOp::FZeroLt | IrOp::FEq | IrOp::FLt | IrOp::FtoS => 1,
|
IrOp::FZeroEq | IrOp::FZeroLt | IrOp::FEq | IrOp::FLt | IrOp::FtoS => 1,
|
||||||
// Cross-stack: pop from data stack
|
// Cross-stack: pop from data stack
|
||||||
IrOp::FetchFloat | IrOp::StoreFloat | IrOp::StoF => -1,
|
IrOp::FetchFloat | IrOp::StoreFloat | IrOp::StoF => -1,
|
||||||
|
// Return stack reads push to data stack
|
||||||
|
IrOp::RFetch | IrOp::LoopJ => 1,
|
||||||
_ => 0,
|
_ => 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1185,58 +1213,8 @@ fn stack_delta(op: &IrOp) -> i32 {
|
|||||||
/// that any op reads from, not just the net depth after consumption.
|
/// that any op reads from, not just the net depth after consumption.
|
||||||
fn compute_stack_needs(ops: &[IrOp]) -> (u32, i32) {
|
fn compute_stack_needs(ops: &[IrOp]) -> (u32, i32) {
|
||||||
let mut depth: i32 = 0;
|
let mut depth: i32 = 0;
|
||||||
let mut min_accessed: i32 = 0; // most negative position accessed
|
let mut min_accessed: i32 = 0;
|
||||||
|
compute_stack_needs_rec(ops, &mut depth, &mut min_accessed);
|
||||||
for op in ops {
|
|
||||||
// Determine the deepest position this op reads from relative to
|
|
||||||
// current depth. Position 0 = top of stack = depth-1 from base.
|
|
||||||
let reads_from = match op {
|
|
||||||
// These read the top without consuming:
|
|
||||||
IrOp::Dup => depth - 1,
|
|
||||||
// Reads top and second without consuming:
|
|
||||||
IrOp::Over => depth - 2,
|
|
||||||
IrOp::TwoDup => depth - 2,
|
|
||||||
// Reads/rearranges top 2:
|
|
||||||
IrOp::Swap | IrOp::Nip | IrOp::Tuck => depth - 2,
|
|
||||||
// Reads/rearranges top 3:
|
|
||||||
IrOp::Rot => depth - 3,
|
|
||||||
// Binary ops consume 2:
|
|
||||||
IrOp::Add
|
|
||||||
| IrOp::Sub
|
|
||||||
| IrOp::Mul
|
|
||||||
| IrOp::And
|
|
||||||
| IrOp::Or
|
|
||||||
| IrOp::Xor
|
|
||||||
| IrOp::Lshift
|
|
||||||
| IrOp::Rshift
|
|
||||||
| IrOp::ArithRshift
|
|
||||||
| IrOp::Eq
|
|
||||||
| IrOp::NotEq
|
|
||||||
| IrOp::Lt
|
|
||||||
| IrOp::Gt
|
|
||||||
| IrOp::LtUnsigned
|
|
||||||
| IrOp::DivMod
|
|
||||||
| IrOp::Store
|
|
||||||
| IrOp::CStore
|
|
||||||
| IrOp::PlusStore => depth - 2,
|
|
||||||
// Unary ops consume 1:
|
|
||||||
IrOp::Drop
|
|
||||||
| IrOp::Negate
|
|
||||||
| IrOp::Abs
|
|
||||||
| IrOp::Invert
|
|
||||||
| IrOp::ZeroEq
|
|
||||||
| IrOp::ZeroLt
|
|
||||||
| IrOp::Fetch
|
|
||||||
| IrOp::CFetch => depth - 1,
|
|
||||||
IrOp::TwoDrop => depth - 2,
|
|
||||||
// Cross-stack ops that pop from data stack
|
|
||||||
IrOp::FetchFloat | IrOp::StoreFloat | IrOp::StoF => depth - 1,
|
|
||||||
// Push ops, float-only ops, and other ops don't read data stack items
|
|
||||||
_ => depth,
|
|
||||||
};
|
|
||||||
min_accessed = min_accessed.min(reads_from);
|
|
||||||
depth += stack_delta(op);
|
|
||||||
}
|
|
||||||
let preload = if min_accessed < 0 {
|
let preload = if min_accessed < 0 {
|
||||||
(-min_accessed) as u32
|
(-min_accessed) as u32
|
||||||
} else {
|
} else {
|
||||||
@@ -1245,6 +1223,99 @@ fn compute_stack_needs(ops: &[IrOp]) -> (u32, i32) {
|
|||||||
(preload, depth)
|
(preload, depth)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Recursive stack-needs analysis that descends into control flow bodies.
|
||||||
|
fn compute_stack_needs_rec(ops: &[IrOp], depth: &mut i32, min_accessed: &mut i32) {
|
||||||
|
for op in ops {
|
||||||
|
// First: compute the deepest position this op reads from.
|
||||||
|
let reads_from = match op {
|
||||||
|
IrOp::Dup => *depth - 1,
|
||||||
|
IrOp::Over | IrOp::TwoDup => *depth - 2,
|
||||||
|
IrOp::Swap | IrOp::Nip | IrOp::Tuck => *depth - 2,
|
||||||
|
IrOp::Rot => *depth - 3,
|
||||||
|
IrOp::Add | IrOp::Sub | IrOp::Mul | IrOp::And | IrOp::Or | IrOp::Xor
|
||||||
|
| IrOp::Lshift | IrOp::Rshift | IrOp::ArithRshift
|
||||||
|
| IrOp::Eq | IrOp::NotEq | IrOp::Lt | IrOp::Gt | IrOp::LtUnsigned
|
||||||
|
| IrOp::DivMod | IrOp::Store | IrOp::CStore | IrOp::PlusStore => *depth - 2,
|
||||||
|
IrOp::Drop | IrOp::Negate | IrOp::Abs | IrOp::Invert
|
||||||
|
| IrOp::ZeroEq | IrOp::ZeroLt | IrOp::Fetch | IrOp::CFetch => *depth - 1,
|
||||||
|
IrOp::TwoDrop => *depth - 2,
|
||||||
|
IrOp::FetchFloat | IrOp::StoreFloat | IrOp::StoF => *depth - 1,
|
||||||
|
// Control flow reads are handled by recursion below
|
||||||
|
IrOp::If { .. } => *depth - 1, // consumes condition
|
||||||
|
IrOp::DoLoop { .. } => *depth - 2, // consumes limit + index
|
||||||
|
_ => *depth,
|
||||||
|
};
|
||||||
|
*min_accessed = (*min_accessed).min(reads_from);
|
||||||
|
|
||||||
|
// Then: update depth. For control flow, recurse instead of using stack_delta.
|
||||||
|
match op {
|
||||||
|
IrOp::If { then_body, else_body } => {
|
||||||
|
*depth -= 1; // consume condition
|
||||||
|
let saved = *depth;
|
||||||
|
compute_stack_needs_rec(then_body, depth, min_accessed);
|
||||||
|
if let Some(eb) = else_body {
|
||||||
|
let then_depth = *depth;
|
||||||
|
*depth = saved;
|
||||||
|
compute_stack_needs_rec(eb, depth, min_accessed);
|
||||||
|
// Use the then-branch depth (both should match for well-formed code)
|
||||||
|
*depth = then_depth;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
IrOp::DoLoop { body, is_plus_loop } => {
|
||||||
|
*depth -= 2; // consume limit + index
|
||||||
|
// Loop body is stack-neutral (net 0, or +1 for +LOOP step)
|
||||||
|
// We still recurse to track min_accessed inside the body.
|
||||||
|
let saved = *depth;
|
||||||
|
compute_stack_needs_rec(body, depth, min_accessed);
|
||||||
|
// Restore: body effect is consumed by loop control
|
||||||
|
*depth = saved;
|
||||||
|
if *is_plus_loop {
|
||||||
|
// +LOOP body pushes 1 step value, consumed by loop control
|
||||||
|
}
|
||||||
|
}
|
||||||
|
IrOp::BeginUntil { body } => {
|
||||||
|
let saved = *depth;
|
||||||
|
compute_stack_needs_rec(body, depth, min_accessed);
|
||||||
|
// Body produces flag, consumed by UNTIL: net 0 for the whole construct
|
||||||
|
*depth = saved;
|
||||||
|
}
|
||||||
|
IrOp::BeginAgain { body } => {
|
||||||
|
let saved = *depth;
|
||||||
|
compute_stack_needs_rec(body, depth, min_accessed);
|
||||||
|
*depth = saved;
|
||||||
|
}
|
||||||
|
IrOp::BeginWhileRepeat { test, body } => {
|
||||||
|
let saved = *depth;
|
||||||
|
compute_stack_needs_rec(test, depth, min_accessed);
|
||||||
|
// WHILE consumes flag
|
||||||
|
*depth -= 1;
|
||||||
|
compute_stack_needs_rec(body, depth, min_accessed);
|
||||||
|
// Whole construct is stack-neutral
|
||||||
|
*depth = saved;
|
||||||
|
}
|
||||||
|
IrOp::BeginDoubleWhileRepeat {
|
||||||
|
outer_test, inner_test, body, after_repeat, else_body,
|
||||||
|
} => {
|
||||||
|
let saved = *depth;
|
||||||
|
compute_stack_needs_rec(outer_test, depth, min_accessed);
|
||||||
|
*depth -= 1;
|
||||||
|
compute_stack_needs_rec(inner_test, depth, min_accessed);
|
||||||
|
*depth -= 1;
|
||||||
|
compute_stack_needs_rec(body, depth, min_accessed);
|
||||||
|
compute_stack_needs_rec(after_repeat, depth, min_accessed);
|
||||||
|
if let Some(eb) = else_body {
|
||||||
|
compute_stack_needs_rec(eb, depth, min_accessed);
|
||||||
|
}
|
||||||
|
*depth = saved;
|
||||||
|
}
|
||||||
|
// All other ops: use stack_delta
|
||||||
|
_ => {
|
||||||
|
*depth += stack_delta(op);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Count how many WASM locals the promoted code path needs (excluding cached
|
/// Count how many WASM locals the promoted code path needs (excluding cached
|
||||||
/// DSP and scratch locals). This is an upper bound -- we allocate a fresh
|
/// DSP and scratch locals). This is an upper bound -- we allocate a fresh
|
||||||
/// local for each value-producing operation.
|
/// local for each value-producing operation.
|
||||||
@@ -3565,13 +3636,13 @@ mod tests {
|
|||||||
then_body: vec![],
|
then_body: vec![],
|
||||||
else_body: None,
|
else_body: None,
|
||||||
}]));
|
}]));
|
||||||
// IF also prevents promotion (for now)
|
// IF with ELSE is promotable
|
||||||
assert!(!is_promotable(&[IrOp::PushI32(1), IrOp::If {
|
assert!(is_promotable(&[IrOp::PushI32(1), IrOp::If {
|
||||||
then_body: vec![IrOp::PushI32(1)],
|
then_body: vec![IrOp::PushI32(1)],
|
||||||
else_body: Some(vec![IrOp::PushI32(0)]),
|
else_body: Some(vec![IrOp::PushI32(0)]),
|
||||||
}]));
|
}]));
|
||||||
// Control flow prevents promotion (for now)
|
// DO/LOOP with stack-neutral body is promotable
|
||||||
assert!(!is_promotable(&[IrOp::PushI32(10), IrOp::PushI32(0), IrOp::DoLoop {
|
assert!(is_promotable(&[IrOp::PushI32(10), IrOp::PushI32(0), IrOp::DoLoop {
|
||||||
body: vec![IrOp::RFetch, IrOp::Drop],
|
body: vec![IrOp::RFetch, IrOp::Drop],
|
||||||
is_plus_loop: false,
|
is_plus_loop: false,
|
||||||
}]));
|
}]));
|
||||||
@@ -3764,9 +3835,16 @@ mod tests {
|
|||||||
assert!(!is_promotable(&ops));
|
assert!(!is_promotable(&ops));
|
||||||
assert_eq!(run_word(&ops), vec![42]);
|
assert_eq!(run_word(&ops), vec![42]);
|
||||||
|
|
||||||
// Calls prevent promotion but still work
|
// IF-with-ELSE IS promotable and works
|
||||||
let ops = vec![IrOp::Call(WordId(5))];
|
let ops = vec![
|
||||||
assert!(!is_promotable(&ops));
|
IrOp::PushI32(-1),
|
||||||
|
IrOp::If {
|
||||||
|
then_body: vec![IrOp::PushI32(42)],
|
||||||
|
else_body: Some(vec![IrOp::PushI32(0)]),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
assert!(is_promotable(&ops));
|
||||||
|
assert_eq!(run_word(&ops), vec![42]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ===================================================================
|
// ===================================================================
|
||||||
|
|||||||
Reference in New Issue
Block a user