From 2b94912a4d8d8576116046955167a1ca32c13fa1 Mon Sep 17 00:00:00 2001
From: Kaylee Lubick <kjlubick@google.com>
Date: Mon, 27 Apr 2026 15:55:24 +0000
Subject: [PATCH] Avoid removing too many stack entries in SkRP during discard_stack

Consider an sksl snippet like:
```
half4 main(float2 xy) {
    float4 v = float4(xy.x, -1.0, -2.0, -3.0);
    v = abs(v);
    return half4(v);
}
```

this could be turned into (unoptimized) instructions like [1]
```
store_src_rg                   xy = src.rg
init_lane_masks                CondMask = LoopMask = RetMask = true
copy_slot_unmasked             v(0) = xy(0)
copy_constant                  v(1) = 0xBF800000 (-1.0)
copy_constant                  v(2) = 0xC0000000 (-2.0)
copy_constant                  v(3) = 0xC0400000 (-3.0)
copy_4_slots_unmasked          $0..3 = v            # unnecessary
bitwise_and_imm_4_ints         $0..3 &= 0x7FFFFFFF
copy_4_slots_unmasked          v = $0..3
load_src                       src.rgba = $0..3
```
( the bitwise_and_imm_4_ints instruction is the abs() part, masking
off the sign bit)

We can optimize cases where we push to the stack (e.g.
the copy_4_slots_unmasked), do an operation (the &=) and then
pop the value off the stack into just doing the operation w/o
involving the stack. (We might have to push the result to the
stack for further use).
```
...
copy_constant                  v(3) = 0xC0400000 (-3.0)
bitwise_and_imm_4_ints         v &= 0x7FFFFFFF
copy_4_slots_unmasked          $0..3 = v
load_src                       src.rgba = $0..3
```

There was a bug in the optimization where we removed *all* the
slots for an instruction (4 in this case because it's a float4)
even though the call was discard_stack(1). This would be followed
up by a call to discard_stack(3) (the remainder of the previous
instruction's stack) and we'd underflow the stack.

Problematic sksl:
```
half4 main(float2 xy) {
  float4 v;
  v.x += xy.x;
  (v = abs(v)).xyz;  # drop 1 channel (the .w)
  return half4(v);
}
// This was in the output
bitwise_and_imm_4_ints         v &= 0x7FFFFFFF
copy_4_slots_unmasked          ExternalPtr(0..3) = v
load_src                       src.rgba = ExternalPtr(0..3)
```

where ExternalPtr is referring to memory outside any variables,
uniforms, or the temporary stack. Yikes! I'll keep that in mind
for the future.

Anyway, for the fix, we'll only remove N slots if the previous
call was N slots wide.

[1] e.g. bazel build //tools/skslc && bazel-bin/tools/skslc/skslc input.sksl output.skrp

Bug: https://issues.chromium.org/issues/500080194
Change-Id: Ib43c62dbec9a2c1d03c2d4960a2925b131504550
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/1218502
Commit-Queue: Kaylee Lubick <kjlubick@google.com>
Reviewed-by: Florin Malita <fmalita@google.com>
---
 .../codegen/SkSLRasterPipelineBuilder.cpp     |  4 +-
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/sksl/codegen/SkSLRasterPipelineBuilder.cpp b/src/sksl/codegen/SkSLRasterPipelineBuilder.cpp
index 9df02016bf..cac2b68d9a 100644
--- a/src/sksl/codegen/SkSLRasterPipelineBuilder.cpp
+++ b/src/sksl/codegen/SkSLRasterPipelineBuilder.cpp
@@ -387,7 +387,7 @@ void Builder::discard_stack(int32_t count, int stackID) {
             case BuilderOp::copy_stack_to_slots_unmasked: {
                 // Look for a pattern of `push, immediate-ops, pop` and simplify it down to an
                 // immediate-op directly to the value slot.
-                if (count == 1) {
+                if (count == lastInstruction->fImmA) {
                     if (this->simplifyImmediateUnmaskedOp()) {
                         return;
                     }
@@ -1374,7 +1374,7 @@ Program::StackDepths Program::tempStackMaxDepths() const {
         current[stackID] += stack_usage(inst);
         largest[stackID] = std::max(current[stackID], largest[stackID]);
         // If we assert here, the generated program has popped off the top of the stack.
-        SkASSERTF(current[stackID] >= 0, "unbalanced temp stack push/pop on stack %d", stackID);
+        SkASSERTF_RELEASE(current[stackID] >= 0, "unbalanced temp stack push/pop on stack");
     }
 
     // Ensure that when the program is complete, our stacks are fully balanced.
-- 
2.43.0

