Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions src/diffusers/hooks/pyramid_attention_broadcast.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,25 +159,28 @@ def new_forward(self, module: torch.nn.Module, *args, **kwargs) -> Any:
)
should_compute_attention = (
self.state.cache is None
or self.state.iteration == 0
or not is_within_timestep_range
or self.state.iteration % self.block_skip_range == 0
)

if should_compute_attention:
output = self.fn_ref.original_forward(*args, **kwargs)
# When outside the active timestep window, release the cached tensor
# immediately so GPU memory is not held until the next reset_state().
if not is_within_timestep_range:
self.state.cache = None
else:
self.state.cache = output
else:
output = self.state.cache

self.state.cache = output
self.state.iteration += 1
return output

def reset_state(self, module: torch.nn.Module) -> None:
self.state.reset()
return module


def apply_pyramid_attention_broadcast(module: torch.nn.Module, config: PyramidAttentionBroadcastConfig):
r"""
Apply [Pyramid Attention Broadcast](https://huggingface.co/papers/2408.12588) to a given pipeline.
Expand Down
Loading