Working(?) breakmodel

This commit is contained in:
ebolam
2023-05-11 20:40:05 -04:00
parent 69d942c00c
commit a6f0e97ba0
2 changed files with 29 additions and 26 deletions

View File

@@ -18,6 +18,7 @@ class HFInferenceModel(InferenceModel):
self.model = None self.model = None
self.tokenizer = None self.tokenizer = None
self.badwordsids = koboldai_settings.badwordsids_default self.badwordsids = koboldai_settings.badwordsids_default
self.usegpu = False
def is_valid(self, model_name, model_path, menu_path): def is_valid(self, model_name, model_path, menu_path):
try: try:
@@ -136,7 +137,7 @@ class HFInferenceModel(InferenceModel):
self.disk_layers = int(parameters['disk_layers']) if 'disk_layers' in parameters and parameters['disk_layers'].isnumeric() else 0 self.disk_layers = int(parameters['disk_layers']) if 'disk_layers' in parameters and parameters['disk_layers'].isnumeric() else 0
breakmodel.gpu_blocks = layers breakmodel.gpu_blocks = layers
breakmodel.disk_blocks = self.disk_layers breakmodel.disk_blocks = self.disk_layers
self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
self.model_type = self.get_model_type() self.model_type = self.get_model_type()
self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
self.model_name = parameters['id'] self.model_name = parameters['id']

View File

@@ -289,6 +289,7 @@ class HFTorchInferenceModel(HFInferenceModel):
raise raise
logger.warning(f"Fell back to GPT2LMHeadModel due to {e}") logger.warning(f"Fell back to GPT2LMHeadModel due to {e}")
logger.debug(traceback_string)
try: try:
return GPT2LMHeadModel.from_pretrained(location, **tf_kwargs) return GPT2LMHeadModel.from_pretrained(location, **tf_kwargs)
except Exception as e: except Exception as e:
@@ -437,10 +438,10 @@ class HFTorchInferenceModel(HFInferenceModel):
): ):
device_map[key] = ( device_map[key] = (
utils.koboldai_vars.gpu_device utils.koboldai_vars.gpu_device
if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu if utils.koboldai_vars.hascuda and self.usegpu
else "cpu" else "cpu"
if not utils.koboldai_vars.hascuda if not utils.koboldai_vars.hascuda
or not utils.koboldai_vars.breakmodel or not self.breakmodel
else breakmodel.primary_device else breakmodel.primary_device
) )
else: else:
@@ -456,12 +457,12 @@ class HFTorchInferenceModel(HFInferenceModel):
) )
device = ( device = (
utils.koboldai_vars.gpu_device utils.koboldai_vars.gpu_device
if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu if utils.koboldai_vars.hascuda and self.usegpu
else "disk" else "disk"
if layer < disk_blocks and layer < ram_blocks if layer < disk_blocks and layer < ram_blocks
else "cpu" else "cpu"
if not utils.koboldai_vars.hascuda if not utils.koboldai_vars.hascuda
or not utils.koboldai_vars.breakmodel or not self.breakmodel
else "shared" else "shared"
if layer < ram_blocks if layer < ram_blocks
else bisect.bisect_right( else bisect.bisect_right(
@@ -566,15 +567,15 @@ class HFTorchInferenceModel(HFInferenceModel):
and breakmodel.primary_device != "cpu" and breakmodel.primary_device != "cpu"
and utils.koboldai_vars.hascuda and utils.koboldai_vars.hascuda
and ( and (
utils.koboldai_vars.breakmodel self.breakmodel
or utils.koboldai_vars.usegpu or self.usegpu
) )
and model_dict[key].dtype is torch.float32 and model_dict[key].dtype is torch.float32
): ):
model_dict[key] = model_dict[key].to(torch.float16) model_dict[key] = model_dict[key].to(torch.float16)
if breakmodel.primary_device == "cpu" or ( if breakmodel.primary_device == "cpu" or (
not utils.koboldai_vars.usegpu not self.usegpu
and not utils.koboldai_vars.breakmodel and not self.breakmodel
and model_dict[key].dtype is torch.float16 and model_dict[key].dtype is torch.float16
): ):
model_dict[key] = model_dict[key].to(torch.float32) model_dict[key] = model_dict[key].to(torch.float32)
@@ -612,14 +613,14 @@ class HFTorchInferenceModel(HFInferenceModel):
and breakmodel.primary_device != "cpu" and breakmodel.primary_device != "cpu"
and utils.koboldai_vars.hascuda and utils.koboldai_vars.hascuda
and ( and (
utils.koboldai_vars.breakmodel self.breakmodel
or utils.koboldai_vars.usegpu or self.usegpu
) )
): ):
dtype = torch.float16 dtype = torch.float16
if breakmodel.primary_device == "cpu" or ( if breakmodel.primary_device == "cpu" or (
not utils.koboldai_vars.usegpu not self.usegpu
and not utils.koboldai_vars.breakmodel and not self.breakmodel
): ):
dtype = torch.float32 dtype = torch.float32
if ( if (
@@ -675,16 +676,16 @@ class HFTorchInferenceModel(HFInferenceModel):
and breakmodel.primary_device != "cpu" and breakmodel.primary_device != "cpu"
and utils.koboldai_vars.hascuda and utils.koboldai_vars.hascuda
and ( and (
utils.koboldai_vars.breakmodel self.breakmodel
or utils.koboldai_vars.usegpu or self.usegpu
) )
and model_dict[key].dtype is torch.float32 and model_dict[key].dtype is torch.float32
): ):
model_dict[key] = model_dict[key].to(torch.float16) model_dict[key] = model_dict[key].to(torch.float16)
if breakmodel.primary_device == "cpu" or ( if breakmodel.primary_device == "cpu" or (
not utils.koboldai_vars.usegpu not self.usegpu
and not utils.koboldai_vars.breakmodel and not self.breakmodel
and model_dict[key].dtype is torch.float16 and model_dict[key].dtype is torch.float16
): ):
model_dict[key] = model_dict[key].to(torch.float32) model_dict[key] = model_dict[key].to(torch.float32)
@@ -723,14 +724,14 @@ class HFTorchInferenceModel(HFInferenceModel):
and breakmodel.primary_device != "cpu" and breakmodel.primary_device != "cpu"
and utils.koboldai_vars.hascuda and utils.koboldai_vars.hascuda
and ( and (
utils.koboldai_vars.breakmodel self.breakmodel
or utils.koboldai_vars.usegpu or self.usegpu
) )
): ):
dtype = torch.float16 dtype = torch.float16
if breakmodel.primary_device == "cpu" or ( if breakmodel.primary_device == "cpu" or (
not utils.koboldai_vars.usegpu not self.usegpu
and not utils.koboldai_vars.breakmodel and not self.breakmodel
): ):
dtype = torch.float32 dtype = torch.float32
if ( if (
@@ -764,7 +765,7 @@ class HFTorchInferenceModel(HFInferenceModel):
if always_use or ( if always_use or (
utils.koboldai_vars.hascuda utils.koboldai_vars.hascuda
and self.low_mem and self.low_mem
and (utils.koboldai_vars.usegpu or utils.koboldai_vars.breakmodel) and (self.usegpu or self.breakmodel)
): ):
original_dtype = torch.get_default_dtype() original_dtype = torch.get_default_dtype()
torch.set_default_dtype(torch.float16) torch.set_default_dtype(torch.float16)
@@ -956,8 +957,9 @@ class HFTorchInferenceModel(HFInferenceModel):
-1, -1,
utils.num_layers(config), utils.num_layers(config),
): ):
utils.koboldai_vars.breakmodel = False logger.debug("All layers on same GPU. Breakmodel disabled")
utils.koboldai_vars.usegpu = True self.breakmodel = False
self.usegpu = True
utils.koboldai_vars.gpu_device = len(breakmodel.gpu_blocks) - 1 utils.koboldai_vars.gpu_device = len(breakmodel.gpu_blocks) - 1
return return
@@ -966,6 +968,6 @@ class HFTorchInferenceModel(HFInferenceModel):
import breakmodel import breakmodel
breakmodel.primary_device = "cpu" breakmodel.primary_device = "cpu"
utils.koboldai_vars.breakmodel = False self.breakmodel = False
utils.koboldai_vars.usegpu = False self.usegpu = False
return return