Fix for breakmodel loading to CPU when set to GPU

2025-06-05 21:59:24 +02:00 · 2023-05-22 20:24:57 -04:00
parent f1a16f260f
commit 9e53bcf676
4 changed files with 17 additions and 5 deletions
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -248,11 +248,12 @@ class model_backend(HFTorchInferenceModel):

        self.patch_embedding()

+        
        if utils.koboldai_vars.hascuda:
-            if utils.koboldai_vars.usegpu:
+            if self.usegpu:
                # Use just VRAM
                self.model = self.model.half().to(utils.koboldai_vars.gpu_device)
-            elif utils.koboldai_vars.breakmodel:
+            elif self.breakmodel:
                # Use both RAM and VRAM (breakmodel)
                if not self.lazy_load:
                    self.breakmodel_device_config(self.model.config)
@@ -267,7 +268,8 @@ class model_backend(HFTorchInferenceModel):
            self._move_to_devices()
        else:
            self.model = self.model.to("cpu").float()
-
+        
+        
        self.model.kai_model = self
        utils.koboldai_vars.modeldim = self.get_hidden_size()