Hello its breaking breakmodel time

2025-06-05 21:59:24 +02:00 · 2023-05-27 16:31:53 -05:00
parent 97d2a78899
commit 1546b9efaa
8 changed files with 236 additions and 1097 deletions
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -13,12 +13,6 @@ import modeling.lazy_loader as lazy_loader
 import koboldai_settings
 from logger import logger

-try:
-    import breakmodel
-except ModuleNotFoundError as e:
-    # Breakmodel is only expected to work on GPU
-    if not utils.koboldai_vars.use_colab_tpu:
-        raise e

 from modeling.inference_models.hf_torch import HFTorchInferenceModel

@@ -70,14 +64,6 @@ class model_backend(HFTorchInferenceModel):
        # If we're using torch_lazy_loader, we need to get breakmodel config
        # early so that it knows where to load the individual model tensors
        logger.debug("lazy_load: {} hascuda: {} breakmodel: {} nobreakmode: {}".format(self.lazy_load, utils.koboldai_vars.hascuda, self.breakmodel, self.nobreakmodel))
-        if (
-            self.lazy_load
-            and utils.koboldai_vars.hascuda
-            and self.breakmodel
-            and not self.nobreakmodel
-        ):
-            logger.debug("loading breakmodel")
-            self.breakmodel_device_config(self.model_config)

        if self.lazy_load:
            # If we're using lazy loader, we need to figure out what the model's hidden layers are called
@@ -141,7 +127,7 @@ class model_backend(HFTorchInferenceModel):
                        self.get_local_model_path(ignore_existance=True)
                    )

-                    if utils.koboldai_vars.fp32_model and not breakmodel.disk_blocks:
+                    if utils.koboldai_vars.fp32_model:
                        # Use save_pretrained to convert fp32 models to fp16,
                        # unless we are using disk cache because save_pretrained
                        # is not supported in that case
@@ -247,27 +233,6 @@ class model_backend(HFTorchInferenceModel):
                    shutil.rmtree("cache/")

        self.patch_embedding()
-
-        
-        if utils.koboldai_vars.hascuda:
-            if self.usegpu:
-                # Use just VRAM
-                self.model = self.model.half().to(utils.koboldai_vars.gpu_device)
-            elif self.breakmodel:
-                # Use both RAM and VRAM (breakmodel)
-                if not self.lazy_load:
-                    self.breakmodel_device_config(self.model.config)
-                self._move_to_devices()
-            elif breakmodel.disk_blocks > 0:
-                # Use disk
-                self._move_to_devices()
-            else:
-                # Use CPU
-                self.model = self.model.to("cpu").float()
-        elif breakmodel.disk_blocks > 0:
-            self._move_to_devices()
-        else:
-            self.model = self.model.to("cpu").float()
        
        
        self.model.kai_model = self