From a6f0e97ba0ecf17b558e7577834ed9cff964be00 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 11 May 2023 20:40:05 -0400
Subject: [PATCH] Working(?) breakmodel

---
 modeling/inference_models/parents/hf.py       |  3 +-
 modeling/inference_models/parents/hf_torch.py | 52 ++++++++++---------
 2 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py
index c7a781d7..67fd8b15 100644
--- a/modeling/inference_models/parents/hf.py
+++ b/modeling/inference_models/parents/hf.py
@@ -18,6 +18,7 @@ class HFInferenceModel(InferenceModel):
         self.model = None
         self.tokenizer = None
         self.badwordsids = koboldai_settings.badwordsids_default
+        self.usegpu = False
 
     def is_valid(self, model_name, model_path, menu_path):
         try:
@@ -136,7 +137,7 @@ class HFInferenceModel(InferenceModel):
             self.disk_layers = int(parameters['disk_layers']) if 'disk_layers' in parameters and parameters['disk_layers'].isnumeric() else 0    
             breakmodel.gpu_blocks = layers
             breakmodel.disk_blocks = self.disk_layers
-            self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
+            self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
             self.model_type = self.get_model_type()
             self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
         self.model_name = parameters['id']
diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py
index 84c60a6c..d942a572 100644
--- a/modeling/inference_models/parents/hf_torch.py
+++ b/modeling/inference_models/parents/hf_torch.py
@@ -289,6 +289,7 @@ class HFTorchInferenceModel(HFInferenceModel):
                 raise
 
             logger.warning(f"Fell back to GPT2LMHeadModel due to {e}")
+            logger.debug(traceback_string)
             try:
                 return GPT2LMHeadModel.from_pretrained(location, **tf_kwargs)
             except Exception as e:
@@ -437,10 +438,10 @@ class HFTorchInferenceModel(HFInferenceModel):
                 ):
                     device_map[key] = (
                         utils.koboldai_vars.gpu_device
-                        if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu
+                        if utils.koboldai_vars.hascuda and self.usegpu
                         else "cpu"
                         if not utils.koboldai_vars.hascuda
-                        or not utils.koboldai_vars.breakmodel
+                        or not self.breakmodel
                         else breakmodel.primary_device
                     )
                 else:
@@ -456,12 +457,12 @@ class HFTorchInferenceModel(HFInferenceModel):
                     )
                     device = (
                         utils.koboldai_vars.gpu_device
-                        if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu
+                        if utils.koboldai_vars.hascuda and self.usegpu
                         else "disk"
                         if layer < disk_blocks and layer < ram_blocks
                         else "cpu"
                         if not utils.koboldai_vars.hascuda
-                        or not utils.koboldai_vars.breakmodel
+                        or not self.breakmodel
                         else "shared"
                         if layer < ram_blocks
                         else bisect.bisect_right(
@@ -566,15 +567,15 @@ class HFTorchInferenceModel(HFInferenceModel):
                                 and breakmodel.primary_device != "cpu"
                                 and utils.koboldai_vars.hascuda
                                 and (
-                                    utils.koboldai_vars.breakmodel
-                                    or utils.koboldai_vars.usegpu
+                                    self.breakmodel
+                                    or self.usegpu
                                 )
                                 and model_dict[key].dtype is torch.float32
                             ):
                                 model_dict[key] = model_dict[key].to(torch.float16)
                             if breakmodel.primary_device == "cpu" or (
-                                not utils.koboldai_vars.usegpu
-                                and not utils.koboldai_vars.breakmodel
+                                not self.usegpu
+                                and not self.breakmodel
                                 and model_dict[key].dtype is torch.float16
                             ):
                                 model_dict[key] = model_dict[key].to(torch.float32)
@@ -612,14 +613,14 @@ class HFTorchInferenceModel(HFInferenceModel):
                                         and breakmodel.primary_device != "cpu"
                                         and utils.koboldai_vars.hascuda
                                         and (
-                                            utils.koboldai_vars.breakmodel
-                                            or utils.koboldai_vars.usegpu
+                                            self.breakmodel
+                                            or self.usegpu
                                         )
                                     ):
                                         dtype = torch.float16
                                     if breakmodel.primary_device == "cpu" or (
-                                        not utils.koboldai_vars.usegpu
-                                        and not utils.koboldai_vars.breakmodel
+                                        not self.usegpu
+                                        and not self.breakmodel
                                     ):
                                         dtype = torch.float32
                                     if (
@@ -675,16 +676,16 @@ class HFTorchInferenceModel(HFInferenceModel):
                             and breakmodel.primary_device != "cpu"
                             and utils.koboldai_vars.hascuda
                             and (
-                                utils.koboldai_vars.breakmodel
-                                or utils.koboldai_vars.usegpu
+                                self.breakmodel
+                                or self.usegpu
                             )
                             and model_dict[key].dtype is torch.float32
                         ):
                             model_dict[key] = model_dict[key].to(torch.float16)
 
                         if breakmodel.primary_device == "cpu" or (
-                            not utils.koboldai_vars.usegpu
-                            and not utils.koboldai_vars.breakmodel
+                            not self.usegpu
+                            and not self.breakmodel
                             and model_dict[key].dtype is torch.float16
                         ):
                             model_dict[key] = model_dict[key].to(torch.float32)
@@ -723,14 +724,14 @@ class HFTorchInferenceModel(HFInferenceModel):
                                     and breakmodel.primary_device != "cpu"
                                     and utils.koboldai_vars.hascuda
                                     and (
-                                        utils.koboldai_vars.breakmodel
-                                        or utils.koboldai_vars.usegpu
+                                        self.breakmodel
+                                        or self.usegpu
                                     )
                                 ):
                                     dtype = torch.float16
                                 if breakmodel.primary_device == "cpu" or (
-                                    not utils.koboldai_vars.usegpu
-                                    and not utils.koboldai_vars.breakmodel
+                                    not self.usegpu
+                                    and not self.breakmodel
                                 ):
                                     dtype = torch.float32
                                 if (
@@ -764,7 +765,7 @@ class HFTorchInferenceModel(HFInferenceModel):
         if always_use or (
             utils.koboldai_vars.hascuda
             and self.low_mem
-            and (utils.koboldai_vars.usegpu or utils.koboldai_vars.breakmodel)
+            and (self.usegpu or self.breakmodel)
         ):
             original_dtype = torch.get_default_dtype()
             torch.set_default_dtype(torch.float16)
@@ -956,8 +957,9 @@ class HFTorchInferenceModel(HFInferenceModel):
             -1,
             utils.num_layers(config),
         ):
-            utils.koboldai_vars.breakmodel = False
-            utils.koboldai_vars.usegpu = True
+            logger.debug("All layers on same GPU. Breakmodel disabled")
+            self.breakmodel = False
+            self.usegpu = True
             utils.koboldai_vars.gpu_device = len(breakmodel.gpu_blocks) - 1
             return
 
@@ -966,6 +968,6 @@ class HFTorchInferenceModel(HFInferenceModel):
             import breakmodel
 
             breakmodel.primary_device = "cpu"
-            utils.koboldai_vars.breakmodel = False
-            utils.koboldai_vars.usegpu = False
+            self.breakmodel = False
+            self.usegpu = False
             return