Call .detach() in static attention cache update helper

sxu · agrima1304 · commit 3bb031b8d689 · 2025-08-26T14:48:54.000+01:00
Differential Revision: D80853817 Pull Request resolved: pytorch#13618
diff --git a/examples/models/llama/static_attention.py b/examples/models/llama/static_attention.py
@@ -549,7 +549,7 @@ def _update_states(self, attn_updates, update_pos, update_len):
                 style=self.style,
                 update_pos=update_pos,
                 update_len=update_len,
-            )
+            ).detach()
         for cache_id, update in v_cache_updates.items():
             self.v_caches[cache_id] = StaticKVCache.apply_update(
                 self.v_caches[cache_id],
@@ -558,7 +558,7 @@ def _update_states(self, attn_updates, update_pos, update_len):
                 style=self.style,
                 update_pos=update_pos,
                 update_len=update_len,
-            )
+            ).detach()
         self.pos += update_len
 
     def _get_lookahead_decoding_mask(