mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Fix for loading model multiple times loosing the gpu/cpu splits
This commit is contained in:
@@ -197,12 +197,6 @@ class HFInferenceModel(InferenceModel):
|
||||
torch.cuda.empty_cache()
|
||||
except:
|
||||
pass
|
||||
if self.hf_torch:
|
||||
if 'breakmodel' in sys.modules:
|
||||
import breakmodel
|
||||
breakmodel.breakmodel = True
|
||||
breakmodel.gpu_blocks = []
|
||||
breakmodel.disk_blocks = 0
|
||||
|
||||
def _post_load(self) -> None:
|
||||
# These are model specific tokenizer overrides if a model has bad defaults
|
||||
|
@@ -788,6 +788,7 @@ class HFTorchInferenceModel(HFInferenceModel):
|
||||
if device_count < 2:
|
||||
primary = None
|
||||
logger.debug("n_layers: {}".format(n_layers))
|
||||
logger.debug("gpu blocks: {}".format(breakmodel.gpu_blocks))
|
||||
gpu_blocks = breakmodel.gpu_blocks + (
|
||||
device_count - len(breakmodel.gpu_blocks)
|
||||
) * [0]
|
||||
@@ -818,6 +819,8 @@ class HFTorchInferenceModel(HFInferenceModel):
|
||||
|
||||
n_layers = utils.num_layers(config)
|
||||
|
||||
logger.debug("gpu blocks before modification: {}".format(breakmodel.gpu_blocks))
|
||||
|
||||
if utils.args.cpu:
|
||||
breakmodel.gpu_blocks = [0] * n_layers
|
||||
return
|
||||
|
Reference in New Issue
Block a user