mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Merge branch 'Model_Plugins' of https://github.com/ebolam/KoboldAI into Model_Plugins
This commit is contained in:
@@ -248,11 +248,12 @@ class model_backend(HFTorchInferenceModel):
|
|||||||
|
|
||||||
self.patch_embedding()
|
self.patch_embedding()
|
||||||
|
|
||||||
|
|
||||||
if utils.koboldai_vars.hascuda:
|
if utils.koboldai_vars.hascuda:
|
||||||
if utils.koboldai_vars.usegpu:
|
if self.usegpu:
|
||||||
# Use just VRAM
|
# Use just VRAM
|
||||||
self.model = self.model.half().to(utils.koboldai_vars.gpu_device)
|
self.model = self.model.half().to(utils.koboldai_vars.gpu_device)
|
||||||
elif utils.koboldai_vars.breakmodel:
|
elif self.breakmodel:
|
||||||
# Use both RAM and VRAM (breakmodel)
|
# Use both RAM and VRAM (breakmodel)
|
||||||
if not self.lazy_load:
|
if not self.lazy_load:
|
||||||
self.breakmodel_device_config(self.model.config)
|
self.breakmodel_device_config(self.model.config)
|
||||||
@@ -268,6 +269,7 @@ class model_backend(HFTorchInferenceModel):
|
|||||||
else:
|
else:
|
||||||
self.model = self.model.to("cpu").float()
|
self.model = self.model.to("cpu").float()
|
||||||
|
|
||||||
|
|
||||||
self.model.kai_model = self
|
self.model.kai_model = self
|
||||||
utils.koboldai_vars.modeldim = self.get_hidden_size()
|
utils.koboldai_vars.modeldim = self.get_hidden_size()
|
||||||
|
|
||||||
|
@@ -158,7 +158,7 @@ class HFInferenceModel(InferenceModel):
|
|||||||
layers.append(None)
|
layers.append(None)
|
||||||
else:
|
else:
|
||||||
layers.append(parameters["{}_Layers".format(i)])
|
layers.append(parameters["{}_Layers".format(i)])
|
||||||
self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
|
self.cpu_layers = int(parameters['CPU_Layers']) if 'CPU_Layers' in parameters else None
|
||||||
if isinstance(self.cpu_layers, str):
|
if isinstance(self.cpu_layers, str):
|
||||||
self.cpu_layers = int(self.cpu_layers) if self.cpu_layers.isnumeric() else 0
|
self.cpu_layers = int(self.cpu_layers) if self.cpu_layers.isnumeric() else 0
|
||||||
self.layers = layers
|
self.layers = layers
|
||||||
@@ -167,9 +167,11 @@ class HFInferenceModel(InferenceModel):
|
|||||||
self.disk_layers = int(self.disk_layers) if self.disk_layers.isnumeric() else 0
|
self.disk_layers = int(self.disk_layers) if self.disk_layers.isnumeric() else 0
|
||||||
breakmodel.gpu_blocks = layers
|
breakmodel.gpu_blocks = layers
|
||||||
breakmodel.disk_blocks = self.disk_layers
|
breakmodel.disk_blocks = self.disk_layers
|
||||||
self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
|
self.usegpu = self.cpu_layers == 0 and breakmodel.disk_blocks == 0 and sum(self.layers)-self.layers[0] == 0
|
||||||
self.model_type = self.get_model_type()
|
self.model_type = self.get_model_type()
|
||||||
self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
|
self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
|
||||||
|
else:
|
||||||
|
self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
|
||||||
self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
|
self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
|
||||||
self.path = parameters['path'] if 'path' in parameters else None
|
self.path = parameters['path'] if 'path' in parameters else None
|
||||||
|
|
||||||
@@ -195,12 +197,6 @@ class HFInferenceModel(InferenceModel):
|
|||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
if self.hf_torch:
|
|
||||||
if 'breakmodel' in sys.modules:
|
|
||||||
import breakmodel
|
|
||||||
breakmodel.breakmodel = True
|
|
||||||
breakmodel.gpu_blocks = []
|
|
||||||
breakmodel.disk_blocks = 0
|
|
||||||
|
|
||||||
def _post_load(self) -> None:
|
def _post_load(self) -> None:
|
||||||
# These are model specific tokenizer overrides if a model has bad defaults
|
# These are model specific tokenizer overrides if a model has bad defaults
|
||||||
|
@@ -126,6 +126,7 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
return "Unknown"
|
return "Unknown"
|
||||||
|
|
||||||
def _post_load(m_self) -> None:
|
def _post_load(m_self) -> None:
|
||||||
|
|
||||||
if not utils.koboldai_vars.model_type:
|
if not utils.koboldai_vars.model_type:
|
||||||
utils.koboldai_vars.model_type = m_self.get_model_type()
|
utils.koboldai_vars.model_type = m_self.get_model_type()
|
||||||
|
|
||||||
@@ -562,6 +563,7 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
# print(f"Transferring <{key}> to {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ", end="", flush=True)
|
# print(f"Transferring <{key}> to {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ", end="", flush=True)
|
||||||
|
#logger.debug(f"Transferring <{key}> to {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ")
|
||||||
model_dict[key] = model_dict[key].materialize(
|
model_dict[key] = model_dict[key].materialize(
|
||||||
f, map_location="cpu"
|
f, map_location="cpu"
|
||||||
)
|
)
|
||||||
@@ -786,6 +788,7 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
if device_count < 2:
|
if device_count < 2:
|
||||||
primary = None
|
primary = None
|
||||||
logger.debug("n_layers: {}".format(n_layers))
|
logger.debug("n_layers: {}".format(n_layers))
|
||||||
|
logger.debug("gpu blocks: {}".format(breakmodel.gpu_blocks))
|
||||||
gpu_blocks = breakmodel.gpu_blocks + (
|
gpu_blocks = breakmodel.gpu_blocks + (
|
||||||
device_count - len(breakmodel.gpu_blocks)
|
device_count - len(breakmodel.gpu_blocks)
|
||||||
) * [0]
|
) * [0]
|
||||||
@@ -816,6 +819,8 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
|
|
||||||
n_layers = utils.num_layers(config)
|
n_layers = utils.num_layers(config)
|
||||||
|
|
||||||
|
logger.debug("gpu blocks before modification: {}".format(breakmodel.gpu_blocks))
|
||||||
|
|
||||||
if utils.args.cpu:
|
if utils.args.cpu:
|
||||||
breakmodel.gpu_blocks = [0] * n_layers
|
breakmodel.gpu_blocks = [0] * n_layers
|
||||||
return
|
return
|
||||||
@@ -847,6 +852,7 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
# If all layers are on the same device, use the old GPU generation mode
|
# If all layers are on the same device, use the old GPU generation mode
|
||||||
while len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0:
|
while len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0:
|
||||||
breakmodel.gpu_blocks.pop()
|
breakmodel.gpu_blocks.pop()
|
||||||
|
self.breakmodel = True
|
||||||
if len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in (
|
if len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in (
|
||||||
-1,
|
-1,
|
||||||
utils.num_layers(config),
|
utils.num_layers(config),
|
||||||
|
@@ -2405,3 +2405,8 @@ body.connected .popupfooter, .popupfooter.always-available {
|
|||||||
padding-right: 0px;
|
padding-right: 0px;
|
||||||
padding-top: 0px;
|
padding-top: 0px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.input_error {
|
||||||
|
border: 5px solid red !important;
|
||||||
|
box-sizing: border-box !important;
|
||||||
|
}
|
Reference in New Issue
Block a user