Slightly increased performance in breakmodel mode

Commit a283d34b2731abfe7f5f1e939117491f0755cedb made breakmodel mode slower. Performance has been restored to how it was before that commit.
2025-03-23 15:00:05 +01:00 · 2021-10-05 10:25:06 -04:00 · 2021-10-05 10:25:06 -04:00 · f9e6a6da17
commit f9e6a6da17
parent 231621e7c2
1 changed files with 1 additions and 1 deletions
--- a/breakmodel.py
+++ b/breakmodel.py
@ -434,7 +434,7 @@ def new_forward(
                device = primary_device if i < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, i - ram_blocks)
            outputs = block(
                hidden_states.to(device) if breakmodel and hidden_states is not None else hidden_states,
-                layer_past=tuple(v.to(device) for v in layer_past if v is not None) if breakmodel and layer_past is not None else layer_past,
+                layer_past=tuple(v.to(device) for v in layer_past if v is not None) if breakmodel and layer_past is not None and i >= ram_blocks and len(layer_past) and layer_past[0].device.index != device else layer_past,
                attention_mask=attn_mask.to(device) if breakmodel and attn_mask is not None else attn_mask,
                head_mask=head_mask[i].to(device) if breakmodel and head_mask[i] is not None else head_mask[i],
                use_cache=use_cache,