Working(?) breakmodel

This commit is contained in:
ebolam
2023-05-11 20:40:05 -04:00
parent 69d942c00c
commit a6f0e97ba0
2 changed files with 29 additions and 26 deletions

View File

@@ -18,6 +18,7 @@ class HFInferenceModel(InferenceModel):
self.model = None
self.tokenizer = None
self.badwordsids = koboldai_settings.badwordsids_default
self.usegpu = False
def is_valid(self, model_name, model_path, menu_path):
try:
@@ -136,7 +137,7 @@ class HFInferenceModel(InferenceModel):
self.disk_layers = int(parameters['disk_layers']) if 'disk_layers' in parameters and parameters['disk_layers'].isnumeric() else 0
breakmodel.gpu_blocks = layers
breakmodel.disk_blocks = self.disk_layers
self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
self.model_type = self.get_model_type()
self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
self.model_name = parameters['id']

View File

@@ -289,6 +289,7 @@ class HFTorchInferenceModel(HFInferenceModel):
raise
logger.warning(f"Fell back to GPT2LMHeadModel due to {e}")
logger.debug(traceback_string)
try:
return GPT2LMHeadModel.from_pretrained(location, **tf_kwargs)
except Exception as e:
@@ -437,10 +438,10 @@ class HFTorchInferenceModel(HFInferenceModel):
):
device_map[key] = (
utils.koboldai_vars.gpu_device
if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu
if utils.koboldai_vars.hascuda and self.usegpu
else "cpu"
if not utils.koboldai_vars.hascuda
or not utils.koboldai_vars.breakmodel
or not self.breakmodel
else breakmodel.primary_device
)
else:
@@ -456,12 +457,12 @@ class HFTorchInferenceModel(HFInferenceModel):
)
device = (
utils.koboldai_vars.gpu_device
if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu
if utils.koboldai_vars.hascuda and self.usegpu
else "disk"
if layer < disk_blocks and layer < ram_blocks
else "cpu"
if not utils.koboldai_vars.hascuda
or not utils.koboldai_vars.breakmodel
or not self.breakmodel
else "shared"
if layer < ram_blocks
else bisect.bisect_right(
@@ -566,15 +567,15 @@ class HFTorchInferenceModel(HFInferenceModel):
and breakmodel.primary_device != "cpu"
and utils.koboldai_vars.hascuda
and (
utils.koboldai_vars.breakmodel
or utils.koboldai_vars.usegpu
self.breakmodel
or self.usegpu
)
and model_dict[key].dtype is torch.float32
):
model_dict[key] = model_dict[key].to(torch.float16)
if breakmodel.primary_device == "cpu" or (
not utils.koboldai_vars.usegpu
and not utils.koboldai_vars.breakmodel
not self.usegpu
and not self.breakmodel
and model_dict[key].dtype is torch.float16
):
model_dict[key] = model_dict[key].to(torch.float32)
@@ -612,14 +613,14 @@ class HFTorchInferenceModel(HFInferenceModel):
and breakmodel.primary_device != "cpu"
and utils.koboldai_vars.hascuda
and (
utils.koboldai_vars.breakmodel
or utils.koboldai_vars.usegpu
self.breakmodel
or self.usegpu
)
):
dtype = torch.float16
if breakmodel.primary_device == "cpu" or (
not utils.koboldai_vars.usegpu
and not utils.koboldai_vars.breakmodel
not self.usegpu
and not self.breakmodel
):
dtype = torch.float32
if (
@@ -675,16 +676,16 @@ class HFTorchInferenceModel(HFInferenceModel):
and breakmodel.primary_device != "cpu"
and utils.koboldai_vars.hascuda
and (
utils.koboldai_vars.breakmodel
or utils.koboldai_vars.usegpu
self.breakmodel
or self.usegpu
)
and model_dict[key].dtype is torch.float32
):
model_dict[key] = model_dict[key].to(torch.float16)
if breakmodel.primary_device == "cpu" or (
not utils.koboldai_vars.usegpu
and not utils.koboldai_vars.breakmodel
not self.usegpu
and not self.breakmodel
and model_dict[key].dtype is torch.float16
):
model_dict[key] = model_dict[key].to(torch.float32)
@@ -723,14 +724,14 @@ class HFTorchInferenceModel(HFInferenceModel):
and breakmodel.primary_device != "cpu"
and utils.koboldai_vars.hascuda
and (
utils.koboldai_vars.breakmodel
or utils.koboldai_vars.usegpu
self.breakmodel
or self.usegpu
)
):
dtype = torch.float16
if breakmodel.primary_device == "cpu" or (
not utils.koboldai_vars.usegpu
and not utils.koboldai_vars.breakmodel
not self.usegpu
and not self.breakmodel
):
dtype = torch.float32
if (
@@ -764,7 +765,7 @@ class HFTorchInferenceModel(HFInferenceModel):
if always_use or (
utils.koboldai_vars.hascuda
and self.low_mem
and (utils.koboldai_vars.usegpu or utils.koboldai_vars.breakmodel)
and (self.usegpu or self.breakmodel)
):
original_dtype = torch.get_default_dtype()
torch.set_default_dtype(torch.float16)
@@ -956,8 +957,9 @@ class HFTorchInferenceModel(HFInferenceModel):
-1,
utils.num_layers(config),
):
utils.koboldai_vars.breakmodel = False
utils.koboldai_vars.usegpu = True
logger.debug("All layers on same GPU. Breakmodel disabled")
self.breakmodel = False
self.usegpu = True
utils.koboldai_vars.gpu_device = len(breakmodel.gpu_blocks) - 1
return
@@ -966,6 +968,6 @@ class HFTorchInferenceModel(HFInferenceModel):
import breakmodel
breakmodel.primary_device = "cpu"
utils.koboldai_vars.breakmodel = False
utils.koboldai_vars.usegpu = False
self.breakmodel = False
self.usegpu = False
return