From b81f61b8209c54d1325ff9a0803d01b62f226f38 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Wed, 21 Jun 2023 18:35:56 -0500
Subject: [PATCH] Clean debug

---
 modeling/inference_models/hf_torch.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index a10a48f3..6bcd88cd 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -110,6 +110,9 @@ class HFTorchInferenceModel(HFInferenceModel):
             self.breakmodel_config.gpu_blocks = self.layers
             self.breakmodel_config.disk_blocks = self.disk_layers
 
+        # HACK: Prevent get_auxiliary_device from returning cuda
+        utils.koboldai_vars.hascuda = self.usegpu
+
         return ret
 
     def _get_target_dtype(self) -> Union[torch.float16, torch.float32]:
@@ -117,6 +120,8 @@ class HFTorchInferenceModel(HFInferenceModel):
             return torch.float32
         elif utils.args.cpu:
             return torch.float32
+        elif not self.usegpu:
+            return torch.float32
         return torch.float16
 
     def _apply_warpers(
@@ -316,11 +321,6 @@ class HFTorchInferenceModel(HFInferenceModel):
 
         # Try to determine model type from either AutoModel or falling back to legacy
         try:
-            print(f"self.lazy_load {self.lazy_load}")
-            print(f"self.breakmodel {self.breakmodel}")
-            print(f"self.nobreakmodel {self.nobreakmodel}")
-            print(f"args.cpu {utils.args.cpu}")
-
             if self.lazy_load:
                 with lazy_loader.use_lazy_load(dematerialized_modules=True):
                     metamodel = AutoModelForCausalLM.from_config(self.model_config)
@@ -344,6 +344,13 @@ class HFTorchInferenceModel(HFInferenceModel):
                     **tf_kwargs,
                 )
 
+            if not self.lazy_load:
+                # We need to move the model to the desired device
+                if (not self.usegpu) or torch.cuda.device_count() <= 0:
+                    model = model.to("cpu")
+                else:
+                    model = model.to("cuda")
+
             return model
         except Exception as e:
             traceback_string = traceback.format_exc().lower()