Fix for breakmodel loading to CPU when set to GPU

This commit is contained in:
ebolam
2023-05-22 20:24:57 -04:00
parent f1a16f260f
commit 9e53bcf676
4 changed files with 17 additions and 5 deletions

View File

@@ -248,11 +248,12 @@ class model_backend(HFTorchInferenceModel):
self.patch_embedding() self.patch_embedding()
if utils.koboldai_vars.hascuda: if utils.koboldai_vars.hascuda:
if utils.koboldai_vars.usegpu: if self.usegpu:
# Use just VRAM # Use just VRAM
self.model = self.model.half().to(utils.koboldai_vars.gpu_device) self.model = self.model.half().to(utils.koboldai_vars.gpu_device)
elif utils.koboldai_vars.breakmodel: elif self.breakmodel:
# Use both RAM and VRAM (breakmodel) # Use both RAM and VRAM (breakmodel)
if not self.lazy_load: if not self.lazy_load:
self.breakmodel_device_config(self.model.config) self.breakmodel_device_config(self.model.config)
@@ -267,7 +268,8 @@ class model_backend(HFTorchInferenceModel):
self._move_to_devices() self._move_to_devices()
else: else:
self.model = self.model.to("cpu").float() self.model = self.model.to("cpu").float()
self.model.kai_model = self self.model.kai_model = self
utils.koboldai_vars.modeldim = self.get_hidden_size() utils.koboldai_vars.modeldim = self.get_hidden_size()

View File

@@ -158,7 +158,7 @@ class HFInferenceModel(InferenceModel):
layers.append(None) layers.append(None)
else: else:
layers.append(parameters["{}_Layers".format(i)]) layers.append(parameters["{}_Layers".format(i)])
self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None self.cpu_layers = int(parameters['CPU_Layers']) if 'CPU_Layers' in parameters else None
if isinstance(self.cpu_layers, str): if isinstance(self.cpu_layers, str):
self.cpu_layers = int(self.cpu_layers) if self.cpu_layers.isnumeric() else 0 self.cpu_layers = int(self.cpu_layers) if self.cpu_layers.isnumeric() else 0
self.layers = layers self.layers = layers
@@ -167,9 +167,11 @@ class HFInferenceModel(InferenceModel):
self.disk_layers = int(self.disk_layers) if self.disk_layers.isnumeric() else 0 self.disk_layers = int(self.disk_layers) if self.disk_layers.isnumeric() else 0
breakmodel.gpu_blocks = layers breakmodel.gpu_blocks = layers
breakmodel.disk_blocks = self.disk_layers breakmodel.disk_blocks = self.disk_layers
self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None self.usegpu = self.cpu_layers == 0 and breakmodel.disk_blocks == 0 and sum(self.layers)-self.layers[0] == 0
self.model_type = self.get_model_type() self.model_type = self.get_model_type()
self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
else:
self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id'] self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
self.path = parameters['path'] if 'path' in parameters else None self.path = parameters['path'] if 'path' in parameters else None

View File

@@ -126,6 +126,7 @@ class HFTorchInferenceModel(HFInferenceModel):
return "Unknown" return "Unknown"
def _post_load(m_self) -> None: def _post_load(m_self) -> None:
if not utils.koboldai_vars.model_type: if not utils.koboldai_vars.model_type:
utils.koboldai_vars.model_type = m_self.get_model_type() utils.koboldai_vars.model_type = m_self.get_model_type()
@@ -562,6 +563,7 @@ class HFTorchInferenceModel(HFInferenceModel):
) )
) )
# print(f"Transferring <{key}> to {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ", end="", flush=True) # print(f"Transferring <{key}> to {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ", end="", flush=True)
#logger.debug(f"Transferring <{key}> to {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ")
model_dict[key] = model_dict[key].materialize( model_dict[key] = model_dict[key].materialize(
f, map_location="cpu" f, map_location="cpu"
) )
@@ -847,6 +849,7 @@ class HFTorchInferenceModel(HFInferenceModel):
# If all layers are on the same device, use the old GPU generation mode # If all layers are on the same device, use the old GPU generation mode
while len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0: while len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0:
breakmodel.gpu_blocks.pop() breakmodel.gpu_blocks.pop()
self.breakmodel = True
if len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in ( if len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in (
-1, -1,
utils.num_layers(config), utils.num_layers(config),

View File

@@ -2404,4 +2404,9 @@ body.connected .popupfooter, .popupfooter.always-available {
padding: 5px; padding: 5px;
padding-right: 0px; padding-right: 0px;
padding-top: 0px; padding-top: 0px;
}
.input_error {
border: 5px solid red !important;
box-sizing: border-box !important;
} }