mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Working(?) breakmodel
This commit is contained in:
@@ -18,6 +18,7 @@ class HFInferenceModel(InferenceModel):
|
|||||||
self.model = None
|
self.model = None
|
||||||
self.tokenizer = None
|
self.tokenizer = None
|
||||||
self.badwordsids = koboldai_settings.badwordsids_default
|
self.badwordsids = koboldai_settings.badwordsids_default
|
||||||
|
self.usegpu = False
|
||||||
|
|
||||||
def is_valid(self, model_name, model_path, menu_path):
|
def is_valid(self, model_name, model_path, menu_path):
|
||||||
try:
|
try:
|
||||||
@@ -136,7 +137,7 @@ class HFInferenceModel(InferenceModel):
|
|||||||
self.disk_layers = int(parameters['disk_layers']) if 'disk_layers' in parameters and parameters['disk_layers'].isnumeric() else 0
|
self.disk_layers = int(parameters['disk_layers']) if 'disk_layers' in parameters and parameters['disk_layers'].isnumeric() else 0
|
||||||
breakmodel.gpu_blocks = layers
|
breakmodel.gpu_blocks = layers
|
||||||
breakmodel.disk_blocks = self.disk_layers
|
breakmodel.disk_blocks = self.disk_layers
|
||||||
self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
|
self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
|
||||||
self.model_type = self.get_model_type()
|
self.model_type = self.get_model_type()
|
||||||
self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
|
self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
|
||||||
self.model_name = parameters['id']
|
self.model_name = parameters['id']
|
||||||
|
@@ -289,6 +289,7 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
logger.warning(f"Fell back to GPT2LMHeadModel due to {e}")
|
logger.warning(f"Fell back to GPT2LMHeadModel due to {e}")
|
||||||
|
logger.debug(traceback_string)
|
||||||
try:
|
try:
|
||||||
return GPT2LMHeadModel.from_pretrained(location, **tf_kwargs)
|
return GPT2LMHeadModel.from_pretrained(location, **tf_kwargs)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -437,10 +438,10 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
):
|
):
|
||||||
device_map[key] = (
|
device_map[key] = (
|
||||||
utils.koboldai_vars.gpu_device
|
utils.koboldai_vars.gpu_device
|
||||||
if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu
|
if utils.koboldai_vars.hascuda and self.usegpu
|
||||||
else "cpu"
|
else "cpu"
|
||||||
if not utils.koboldai_vars.hascuda
|
if not utils.koboldai_vars.hascuda
|
||||||
or not utils.koboldai_vars.breakmodel
|
or not self.breakmodel
|
||||||
else breakmodel.primary_device
|
else breakmodel.primary_device
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
@@ -456,12 +457,12 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
)
|
)
|
||||||
device = (
|
device = (
|
||||||
utils.koboldai_vars.gpu_device
|
utils.koboldai_vars.gpu_device
|
||||||
if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu
|
if utils.koboldai_vars.hascuda and self.usegpu
|
||||||
else "disk"
|
else "disk"
|
||||||
if layer < disk_blocks and layer < ram_blocks
|
if layer < disk_blocks and layer < ram_blocks
|
||||||
else "cpu"
|
else "cpu"
|
||||||
if not utils.koboldai_vars.hascuda
|
if not utils.koboldai_vars.hascuda
|
||||||
or not utils.koboldai_vars.breakmodel
|
or not self.breakmodel
|
||||||
else "shared"
|
else "shared"
|
||||||
if layer < ram_blocks
|
if layer < ram_blocks
|
||||||
else bisect.bisect_right(
|
else bisect.bisect_right(
|
||||||
@@ -566,15 +567,15 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
and breakmodel.primary_device != "cpu"
|
and breakmodel.primary_device != "cpu"
|
||||||
and utils.koboldai_vars.hascuda
|
and utils.koboldai_vars.hascuda
|
||||||
and (
|
and (
|
||||||
utils.koboldai_vars.breakmodel
|
self.breakmodel
|
||||||
or utils.koboldai_vars.usegpu
|
or self.usegpu
|
||||||
)
|
)
|
||||||
and model_dict[key].dtype is torch.float32
|
and model_dict[key].dtype is torch.float32
|
||||||
):
|
):
|
||||||
model_dict[key] = model_dict[key].to(torch.float16)
|
model_dict[key] = model_dict[key].to(torch.float16)
|
||||||
if breakmodel.primary_device == "cpu" or (
|
if breakmodel.primary_device == "cpu" or (
|
||||||
not utils.koboldai_vars.usegpu
|
not self.usegpu
|
||||||
and not utils.koboldai_vars.breakmodel
|
and not self.breakmodel
|
||||||
and model_dict[key].dtype is torch.float16
|
and model_dict[key].dtype is torch.float16
|
||||||
):
|
):
|
||||||
model_dict[key] = model_dict[key].to(torch.float32)
|
model_dict[key] = model_dict[key].to(torch.float32)
|
||||||
@@ -612,14 +613,14 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
and breakmodel.primary_device != "cpu"
|
and breakmodel.primary_device != "cpu"
|
||||||
and utils.koboldai_vars.hascuda
|
and utils.koboldai_vars.hascuda
|
||||||
and (
|
and (
|
||||||
utils.koboldai_vars.breakmodel
|
self.breakmodel
|
||||||
or utils.koboldai_vars.usegpu
|
or self.usegpu
|
||||||
)
|
)
|
||||||
):
|
):
|
||||||
dtype = torch.float16
|
dtype = torch.float16
|
||||||
if breakmodel.primary_device == "cpu" or (
|
if breakmodel.primary_device == "cpu" or (
|
||||||
not utils.koboldai_vars.usegpu
|
not self.usegpu
|
||||||
and not utils.koboldai_vars.breakmodel
|
and not self.breakmodel
|
||||||
):
|
):
|
||||||
dtype = torch.float32
|
dtype = torch.float32
|
||||||
if (
|
if (
|
||||||
@@ -675,16 +676,16 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
and breakmodel.primary_device != "cpu"
|
and breakmodel.primary_device != "cpu"
|
||||||
and utils.koboldai_vars.hascuda
|
and utils.koboldai_vars.hascuda
|
||||||
and (
|
and (
|
||||||
utils.koboldai_vars.breakmodel
|
self.breakmodel
|
||||||
or utils.koboldai_vars.usegpu
|
or self.usegpu
|
||||||
)
|
)
|
||||||
and model_dict[key].dtype is torch.float32
|
and model_dict[key].dtype is torch.float32
|
||||||
):
|
):
|
||||||
model_dict[key] = model_dict[key].to(torch.float16)
|
model_dict[key] = model_dict[key].to(torch.float16)
|
||||||
|
|
||||||
if breakmodel.primary_device == "cpu" or (
|
if breakmodel.primary_device == "cpu" or (
|
||||||
not utils.koboldai_vars.usegpu
|
not self.usegpu
|
||||||
and not utils.koboldai_vars.breakmodel
|
and not self.breakmodel
|
||||||
and model_dict[key].dtype is torch.float16
|
and model_dict[key].dtype is torch.float16
|
||||||
):
|
):
|
||||||
model_dict[key] = model_dict[key].to(torch.float32)
|
model_dict[key] = model_dict[key].to(torch.float32)
|
||||||
@@ -723,14 +724,14 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
and breakmodel.primary_device != "cpu"
|
and breakmodel.primary_device != "cpu"
|
||||||
and utils.koboldai_vars.hascuda
|
and utils.koboldai_vars.hascuda
|
||||||
and (
|
and (
|
||||||
utils.koboldai_vars.breakmodel
|
self.breakmodel
|
||||||
or utils.koboldai_vars.usegpu
|
or self.usegpu
|
||||||
)
|
)
|
||||||
):
|
):
|
||||||
dtype = torch.float16
|
dtype = torch.float16
|
||||||
if breakmodel.primary_device == "cpu" or (
|
if breakmodel.primary_device == "cpu" or (
|
||||||
not utils.koboldai_vars.usegpu
|
not self.usegpu
|
||||||
and not utils.koboldai_vars.breakmodel
|
and not self.breakmodel
|
||||||
):
|
):
|
||||||
dtype = torch.float32
|
dtype = torch.float32
|
||||||
if (
|
if (
|
||||||
@@ -764,7 +765,7 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
if always_use or (
|
if always_use or (
|
||||||
utils.koboldai_vars.hascuda
|
utils.koboldai_vars.hascuda
|
||||||
and self.low_mem
|
and self.low_mem
|
||||||
and (utils.koboldai_vars.usegpu or utils.koboldai_vars.breakmodel)
|
and (self.usegpu or self.breakmodel)
|
||||||
):
|
):
|
||||||
original_dtype = torch.get_default_dtype()
|
original_dtype = torch.get_default_dtype()
|
||||||
torch.set_default_dtype(torch.float16)
|
torch.set_default_dtype(torch.float16)
|
||||||
@@ -956,8 +957,9 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
-1,
|
-1,
|
||||||
utils.num_layers(config),
|
utils.num_layers(config),
|
||||||
):
|
):
|
||||||
utils.koboldai_vars.breakmodel = False
|
logger.debug("All layers on same GPU. Breakmodel disabled")
|
||||||
utils.koboldai_vars.usegpu = True
|
self.breakmodel = False
|
||||||
|
self.usegpu = True
|
||||||
utils.koboldai_vars.gpu_device = len(breakmodel.gpu_blocks) - 1
|
utils.koboldai_vars.gpu_device = len(breakmodel.gpu_blocks) - 1
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -966,6 +968,6 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
import breakmodel
|
import breakmodel
|
||||||
|
|
||||||
breakmodel.primary_device = "cpu"
|
breakmodel.primary_device = "cpu"
|
||||||
utils.koboldai_vars.breakmodel = False
|
self.breakmodel = False
|
||||||
utils.koboldai_vars.usegpu = False
|
self.usegpu = False
|
||||||
return
|
return
|
||||||
|
Reference in New Issue
Block a user