From a6f0e97ba0ecf17b558e7577834ed9cff964be00 Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 11 May 2023 20:40:05 -0400 Subject: [PATCH] Working(?) breakmodel --- modeling/inference_models/parents/hf.py | 3 +- modeling/inference_models/parents/hf_torch.py | 52 ++++++++++--------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py index c7a781d7..67fd8b15 100644 --- a/modeling/inference_models/parents/hf.py +++ b/modeling/inference_models/parents/hf.py @@ -18,6 +18,7 @@ class HFInferenceModel(InferenceModel): self.model = None self.tokenizer = None self.badwordsids = koboldai_settings.badwordsids_default + self.usegpu = False def is_valid(self, model_name, model_path, menu_path): try: @@ -136,7 +137,7 @@ class HFInferenceModel(InferenceModel): self.disk_layers = int(parameters['disk_layers']) if 'disk_layers' in parameters and parameters['disk_layers'].isnumeric() else 0 breakmodel.gpu_blocks = layers breakmodel.disk_blocks = self.disk_layers - self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None + self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None self.model_type = self.get_model_type() self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel self.model_name = parameters['id'] diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py index 84c60a6c..d942a572 100644 --- a/modeling/inference_models/parents/hf_torch.py +++ b/modeling/inference_models/parents/hf_torch.py @@ -289,6 +289,7 @@ class HFTorchInferenceModel(HFInferenceModel): raise logger.warning(f"Fell back to GPT2LMHeadModel due to {e}") + logger.debug(traceback_string) try: return GPT2LMHeadModel.from_pretrained(location, **tf_kwargs) except Exception as e: @@ -437,10 +438,10 @@ class HFTorchInferenceModel(HFInferenceModel): ): device_map[key] = ( utils.koboldai_vars.gpu_device - if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu + if utils.koboldai_vars.hascuda and self.usegpu else "cpu" if not utils.koboldai_vars.hascuda - or not utils.koboldai_vars.breakmodel + or not self.breakmodel else breakmodel.primary_device ) else: @@ -456,12 +457,12 @@ class HFTorchInferenceModel(HFInferenceModel): ) device = ( utils.koboldai_vars.gpu_device - if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu + if utils.koboldai_vars.hascuda and self.usegpu else "disk" if layer < disk_blocks and layer < ram_blocks else "cpu" if not utils.koboldai_vars.hascuda - or not utils.koboldai_vars.breakmodel + or not self.breakmodel else "shared" if layer < ram_blocks else bisect.bisect_right( @@ -566,15 +567,15 @@ class HFTorchInferenceModel(HFInferenceModel): and breakmodel.primary_device != "cpu" and utils.koboldai_vars.hascuda and ( - utils.koboldai_vars.breakmodel - or utils.koboldai_vars.usegpu + self.breakmodel + or self.usegpu ) and model_dict[key].dtype is torch.float32 ): model_dict[key] = model_dict[key].to(torch.float16) if breakmodel.primary_device == "cpu" or ( - not utils.koboldai_vars.usegpu - and not utils.koboldai_vars.breakmodel + not self.usegpu + and not self.breakmodel and model_dict[key].dtype is torch.float16 ): model_dict[key] = model_dict[key].to(torch.float32) @@ -612,14 +613,14 @@ class HFTorchInferenceModel(HFInferenceModel): and breakmodel.primary_device != "cpu" and utils.koboldai_vars.hascuda and ( - utils.koboldai_vars.breakmodel - or utils.koboldai_vars.usegpu + self.breakmodel + or self.usegpu ) ): dtype = torch.float16 if breakmodel.primary_device == "cpu" or ( - not utils.koboldai_vars.usegpu - and not utils.koboldai_vars.breakmodel + not self.usegpu + and not self.breakmodel ): dtype = torch.float32 if ( @@ -675,16 +676,16 @@ class HFTorchInferenceModel(HFInferenceModel): and breakmodel.primary_device != "cpu" and utils.koboldai_vars.hascuda and ( - utils.koboldai_vars.breakmodel - or utils.koboldai_vars.usegpu + self.breakmodel + or self.usegpu ) and model_dict[key].dtype is torch.float32 ): model_dict[key] = model_dict[key].to(torch.float16) if breakmodel.primary_device == "cpu" or ( - not utils.koboldai_vars.usegpu - and not utils.koboldai_vars.breakmodel + not self.usegpu + and not self.breakmodel and model_dict[key].dtype is torch.float16 ): model_dict[key] = model_dict[key].to(torch.float32) @@ -723,14 +724,14 @@ class HFTorchInferenceModel(HFInferenceModel): and breakmodel.primary_device != "cpu" and utils.koboldai_vars.hascuda and ( - utils.koboldai_vars.breakmodel - or utils.koboldai_vars.usegpu + self.breakmodel + or self.usegpu ) ): dtype = torch.float16 if breakmodel.primary_device == "cpu" or ( - not utils.koboldai_vars.usegpu - and not utils.koboldai_vars.breakmodel + not self.usegpu + and not self.breakmodel ): dtype = torch.float32 if ( @@ -764,7 +765,7 @@ class HFTorchInferenceModel(HFInferenceModel): if always_use or ( utils.koboldai_vars.hascuda and self.low_mem - and (utils.koboldai_vars.usegpu or utils.koboldai_vars.breakmodel) + and (self.usegpu or self.breakmodel) ): original_dtype = torch.get_default_dtype() torch.set_default_dtype(torch.float16) @@ -956,8 +957,9 @@ class HFTorchInferenceModel(HFInferenceModel): -1, utils.num_layers(config), ): - utils.koboldai_vars.breakmodel = False - utils.koboldai_vars.usegpu = True + logger.debug("All layers on same GPU. Breakmodel disabled") + self.breakmodel = False + self.usegpu = True utils.koboldai_vars.gpu_device = len(breakmodel.gpu_blocks) - 1 return @@ -966,6 +968,6 @@ class HFTorchInferenceModel(HFInferenceModel): import breakmodel breakmodel.primary_device = "cpu" - utils.koboldai_vars.breakmodel = False - utils.koboldai_vars.usegpu = False + self.breakmodel = False + self.usegpu = False return