mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-02-01 10:06:44 +01:00
Slightly increased performance in breakmodel mode
Commit a283d34b2731abfe7f5f1e939117491f0755cedb made breakmodel mode slower. Performance has been restored to how it was before that commit.
This commit is contained in:
parent
231621e7c2
commit
f9e6a6da17
@ -434,7 +434,7 @@ def new_forward(
|
|||||||
device = primary_device if i < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, i - ram_blocks)
|
device = primary_device if i < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, i - ram_blocks)
|
||||||
outputs = block(
|
outputs = block(
|
||||||
hidden_states.to(device) if breakmodel and hidden_states is not None else hidden_states,
|
hidden_states.to(device) if breakmodel and hidden_states is not None else hidden_states,
|
||||||
layer_past=tuple(v.to(device) for v in layer_past if v is not None) if breakmodel and layer_past is not None else layer_past,
|
layer_past=tuple(v.to(device) for v in layer_past if v is not None) if breakmodel and layer_past is not None and i >= ram_blocks and len(layer_past) and layer_past[0].device.index != device else layer_past,
|
||||||
attention_mask=attn_mask.to(device) if breakmodel and attn_mask is not None else attn_mask,
|
attention_mask=attn_mask.to(device) if breakmodel and attn_mask is not None else attn_mask,
|
||||||
head_mask=head_mask[i].to(device) if breakmodel and head_mask[i] is not None else head_mask[i],
|
head_mask=head_mask[i].to(device) if breakmodel and head_mask[i] is not None else head_mask[i],
|
||||||
use_cache=use_cache,
|
use_cache=use_cache,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user