mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-01-20 20:38:21 +01:00
Slightly increased performance in breakmodel mode
Commit a283d34b27
made breakmodel mode
slower. Performance has been restored to how it was before that commit.
This commit is contained in:
parent
231621e7c2
commit
f9e6a6da17
@ -434,7 +434,7 @@ def new_forward(
|
||||
device = primary_device if i < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, i - ram_blocks)
|
||||
outputs = block(
|
||||
hidden_states.to(device) if breakmodel and hidden_states is not None else hidden_states,
|
||||
layer_past=tuple(v.to(device) for v in layer_past if v is not None) if breakmodel and layer_past is not None else layer_past,
|
||||
layer_past=tuple(v.to(device) for v in layer_past if v is not None) if breakmodel and layer_past is not None and i >= ram_blocks and len(layer_past) and layer_past[0].device.index != device else layer_past,
|
||||
attention_mask=attn_mask.to(device) if breakmodel and attn_mask is not None else attn_mask,
|
||||
head_mask=head_mask[i].to(device) if breakmodel and head_mask[i] is not None else head_mask[i],
|
||||
use_cache=use_cache,
|
||||
|
Loading…
Reference in New Issue
Block a user