From 4cc71666d5d36ae3925e3020f29d045be0e4f218 Mon Sep 17 00:00:00 2001 From: Oleksandr Kozachuk Date: Thu, 9 Apr 2026 19:26:00 +0200 Subject: [PATCH] Enable stack-to-local promotion for DO/LOOP and IF/ELSE Three bugs fixed to safely enable promotion for control flow: 1. compute_stack_needs now recurses into IF/DoLoop/Begin bodies, correctly calculating preload counts for promoted words with nested control flow (was flat, causing stack underflow). 2. BeginDoubleWhileRepeat rejected from promotion (boot.fth's -TRAILING uses this pattern, handler had structural bugs). 3. IF/ELSE branches must have same net stack effect for promotion (BITSSET? has asymmetric branches: 2 items vs 1). Performance with promotion enabled: - Factorial: 0.50x (2x faster than gforth) - Collatz: 0.38x (2.6x faster than gforth) - All 427 unit tests, 10/11 compliance, 35/35 behavioral pass --- crates/core/src/codegen.rs | 210 +++++++++++++++++++++++++------------ 1 file changed, 144 insertions(+), 66 deletions(-) diff --git a/crates/core/src/codegen.rs b/crates/core/src/codegen.rs index 5ebedf8..6a9f45f 100644 --- a/crates/core/src/codegen.rs +++ b/crates/core/src/codegen.rs @@ -1082,9 +1082,7 @@ fn is_promotable_body(ops: &[IrOp]) -> bool { for op in ops { match op { IrOp::Call(_) | IrOp::TailCall(_) | IrOp::Execute | IrOp::SpFetch => return false, - IrOp::ToR | IrOp::FromR | IrOp::RFetch | IrOp::LoopJ | IrOp::Exit => { - return false; - } + IrOp::ToR | IrOp::FromR | IrOp::Exit => return false, IrOp::ForthLocalGet(_) | IrOp::ForthLocalSet(_) => return false, IrOp::Emit | IrOp::Dot | IrOp::Cr | IrOp::Type => return false, IrOp::PushI64(_) | IrOp::PushF64(_) => return false, @@ -1111,10 +1109,38 @@ fn is_promotable_body(ops: &[IrOp]) -> bool { | IrOp::StoreFloat | IrOp::StoF | IrOp::FtoS => return false, - // Control flow not yet promoted in StackSim path - IrOp::If { .. } - | IrOp::DoLoop { .. } - | IrOp::BeginUntil { .. } + // IF with ELSE: promotable if both branches are promotable + // and have the same net stack effect + IrOp::If { then_body, else_body } => { + let Some(eb) = else_body else { + return false; + }; + if !is_promotable_body(then_body) || !is_promotable_body(eb) { + return false; + } + // Both branches must have the same net stack effect + let (_, then_net) = compute_stack_needs(then_body); + let (_, else_net) = compute_stack_needs(eb); + if then_net != else_net { + return false; + } + } + // DO/LOOP: promotable if body is promotable and stack-neutral + IrOp::DoLoop { body, is_plus_loop } => { + if !is_promotable_body(body) { + return false; + } + if body.iter().any(|op| matches!(op, IrOp::Exit)) { + return false; + } + let (_, body_net) = compute_stack_needs(body); + let expected = if *is_plus_loop { 1 } else { 0 }; + if body_net != expected { + return false; + } + } + // BEGIN loops and BeginDoubleWhileRepeat: not yet promoted + IrOp::BeginUntil { .. } | IrOp::BeginAgain { .. } | IrOp::BeginWhileRepeat { .. } | IrOp::BeginDoubleWhileRepeat { .. } => return false, @@ -1171,6 +1197,8 @@ fn stack_delta(op: &IrOp) -> i32 { IrOp::FZeroEq | IrOp::FZeroLt | IrOp::FEq | IrOp::FLt | IrOp::FtoS => 1, // Cross-stack: pop from data stack IrOp::FetchFloat | IrOp::StoreFloat | IrOp::StoF => -1, + // Return stack reads push to data stack + IrOp::RFetch | IrOp::LoopJ => 1, _ => 0, } } @@ -1185,58 +1213,8 @@ fn stack_delta(op: &IrOp) -> i32 { /// that any op reads from, not just the net depth after consumption. fn compute_stack_needs(ops: &[IrOp]) -> (u32, i32) { let mut depth: i32 = 0; - let mut min_accessed: i32 = 0; // most negative position accessed - - for op in ops { - // Determine the deepest position this op reads from relative to - // current depth. Position 0 = top of stack = depth-1 from base. - let reads_from = match op { - // These read the top without consuming: - IrOp::Dup => depth - 1, - // Reads top and second without consuming: - IrOp::Over => depth - 2, - IrOp::TwoDup => depth - 2, - // Reads/rearranges top 2: - IrOp::Swap | IrOp::Nip | IrOp::Tuck => depth - 2, - // Reads/rearranges top 3: - IrOp::Rot => depth - 3, - // Binary ops consume 2: - IrOp::Add - | IrOp::Sub - | IrOp::Mul - | IrOp::And - | IrOp::Or - | IrOp::Xor - | IrOp::Lshift - | IrOp::Rshift - | IrOp::ArithRshift - | IrOp::Eq - | IrOp::NotEq - | IrOp::Lt - | IrOp::Gt - | IrOp::LtUnsigned - | IrOp::DivMod - | IrOp::Store - | IrOp::CStore - | IrOp::PlusStore => depth - 2, - // Unary ops consume 1: - IrOp::Drop - | IrOp::Negate - | IrOp::Abs - | IrOp::Invert - | IrOp::ZeroEq - | IrOp::ZeroLt - | IrOp::Fetch - | IrOp::CFetch => depth - 1, - IrOp::TwoDrop => depth - 2, - // Cross-stack ops that pop from data stack - IrOp::FetchFloat | IrOp::StoreFloat | IrOp::StoF => depth - 1, - // Push ops, float-only ops, and other ops don't read data stack items - _ => depth, - }; - min_accessed = min_accessed.min(reads_from); - depth += stack_delta(op); - } + let mut min_accessed: i32 = 0; + compute_stack_needs_rec(ops, &mut depth, &mut min_accessed); let preload = if min_accessed < 0 { (-min_accessed) as u32 } else { @@ -1245,6 +1223,99 @@ fn compute_stack_needs(ops: &[IrOp]) -> (u32, i32) { (preload, depth) } +/// Recursive stack-needs analysis that descends into control flow bodies. +fn compute_stack_needs_rec(ops: &[IrOp], depth: &mut i32, min_accessed: &mut i32) { + for op in ops { + // First: compute the deepest position this op reads from. + let reads_from = match op { + IrOp::Dup => *depth - 1, + IrOp::Over | IrOp::TwoDup => *depth - 2, + IrOp::Swap | IrOp::Nip | IrOp::Tuck => *depth - 2, + IrOp::Rot => *depth - 3, + IrOp::Add | IrOp::Sub | IrOp::Mul | IrOp::And | IrOp::Or | IrOp::Xor + | IrOp::Lshift | IrOp::Rshift | IrOp::ArithRshift + | IrOp::Eq | IrOp::NotEq | IrOp::Lt | IrOp::Gt | IrOp::LtUnsigned + | IrOp::DivMod | IrOp::Store | IrOp::CStore | IrOp::PlusStore => *depth - 2, + IrOp::Drop | IrOp::Negate | IrOp::Abs | IrOp::Invert + | IrOp::ZeroEq | IrOp::ZeroLt | IrOp::Fetch | IrOp::CFetch => *depth - 1, + IrOp::TwoDrop => *depth - 2, + IrOp::FetchFloat | IrOp::StoreFloat | IrOp::StoF => *depth - 1, + // Control flow reads are handled by recursion below + IrOp::If { .. } => *depth - 1, // consumes condition + IrOp::DoLoop { .. } => *depth - 2, // consumes limit + index + _ => *depth, + }; + *min_accessed = (*min_accessed).min(reads_from); + + // Then: update depth. For control flow, recurse instead of using stack_delta. + match op { + IrOp::If { then_body, else_body } => { + *depth -= 1; // consume condition + let saved = *depth; + compute_stack_needs_rec(then_body, depth, min_accessed); + if let Some(eb) = else_body { + let then_depth = *depth; + *depth = saved; + compute_stack_needs_rec(eb, depth, min_accessed); + // Use the then-branch depth (both should match for well-formed code) + *depth = then_depth; + } + } + IrOp::DoLoop { body, is_plus_loop } => { + *depth -= 2; // consume limit + index + // Loop body is stack-neutral (net 0, or +1 for +LOOP step) + // We still recurse to track min_accessed inside the body. + let saved = *depth; + compute_stack_needs_rec(body, depth, min_accessed); + // Restore: body effect is consumed by loop control + *depth = saved; + if *is_plus_loop { + // +LOOP body pushes 1 step value, consumed by loop control + } + } + IrOp::BeginUntil { body } => { + let saved = *depth; + compute_stack_needs_rec(body, depth, min_accessed); + // Body produces flag, consumed by UNTIL: net 0 for the whole construct + *depth = saved; + } + IrOp::BeginAgain { body } => { + let saved = *depth; + compute_stack_needs_rec(body, depth, min_accessed); + *depth = saved; + } + IrOp::BeginWhileRepeat { test, body } => { + let saved = *depth; + compute_stack_needs_rec(test, depth, min_accessed); + // WHILE consumes flag + *depth -= 1; + compute_stack_needs_rec(body, depth, min_accessed); + // Whole construct is stack-neutral + *depth = saved; + } + IrOp::BeginDoubleWhileRepeat { + outer_test, inner_test, body, after_repeat, else_body, + } => { + let saved = *depth; + compute_stack_needs_rec(outer_test, depth, min_accessed); + *depth -= 1; + compute_stack_needs_rec(inner_test, depth, min_accessed); + *depth -= 1; + compute_stack_needs_rec(body, depth, min_accessed); + compute_stack_needs_rec(after_repeat, depth, min_accessed); + if let Some(eb) = else_body { + compute_stack_needs_rec(eb, depth, min_accessed); + } + *depth = saved; + } + // All other ops: use stack_delta + _ => { + *depth += stack_delta(op); + } + } + } +} + /// Count how many WASM locals the promoted code path needs (excluding cached /// DSP and scratch locals). This is an upper bound -- we allocate a fresh /// local for each value-producing operation. @@ -3565,13 +3636,13 @@ mod tests { then_body: vec![], else_body: None, }])); - // IF also prevents promotion (for now) - assert!(!is_promotable(&[IrOp::PushI32(1), IrOp::If { + // IF with ELSE is promotable + assert!(is_promotable(&[IrOp::PushI32(1), IrOp::If { then_body: vec![IrOp::PushI32(1)], else_body: Some(vec![IrOp::PushI32(0)]), }])); - // Control flow prevents promotion (for now) - assert!(!is_promotable(&[IrOp::PushI32(10), IrOp::PushI32(0), IrOp::DoLoop { + // DO/LOOP with stack-neutral body is promotable + assert!(is_promotable(&[IrOp::PushI32(10), IrOp::PushI32(0), IrOp::DoLoop { body: vec![IrOp::RFetch, IrOp::Drop], is_plus_loop: false, }])); @@ -3764,9 +3835,16 @@ mod tests { assert!(!is_promotable(&ops)); assert_eq!(run_word(&ops), vec![42]); - // Calls prevent promotion but still work - let ops = vec![IrOp::Call(WordId(5))]; - assert!(!is_promotable(&ops)); + // IF-with-ELSE IS promotable and works + let ops = vec![ + IrOp::PushI32(-1), + IrOp::If { + then_body: vec![IrOp::PushI32(42)], + else_body: Some(vec![IrOp::PushI32(0)]), + }, + ]; + assert!(is_promotable(&ops)); + assert_eq!(run_word(&ops), vec![42]); } // ===================================================================