diff --git a/aiserver.py b/aiserver.py
index cbb15dc0..b2d22e56 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -3779,7 +3779,7 @@ def calcsubmit(txt):
                     bias[i] = b["multiplier"]
 
             
-            device = model.get_auxilary_device()
+            device = utils.get_auxilary_device()
             attention_bias.attention_bias = torch.Tensor(bias).to(device)
             logger.info(f"Bias by {koboldai_vars.memory_attn_bias} -- {attention_bias.attention_bias}")
         logger.debug("Submit: experimental_features time {}s".format(time.time()-start_time))
diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 8d06ff6e..a10a48f3 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -46,9 +46,14 @@ class BreakmodelConfig:
     def __init__(self) -> None:
         self.disk_blocks = 0
         self.gpu_blocks = []
+
         self.primary_device = 0 if torch.cuda.device_count() > 0 else "cpu"
 
     def get_device_map(self, model: nn.Module) -> dict:
+        # HACK
+        if utils.args.cpu:
+            self.primary_device = "cpu"
+
         ram_blocks = len(utils.layers_module_names) - sum(self.gpu_blocks)
         cumulative_gpu_blocks = tuple(itertools.accumulate(self.gpu_blocks))
         device_map = {}
@@ -311,10 +316,21 @@ class HFTorchInferenceModel(HFInferenceModel):
 
         # Try to determine model type from either AutoModel or falling back to legacy
         try:
+            print(f"self.lazy_load {self.lazy_load}")
+            print(f"self.breakmodel {self.breakmodel}")
+            print(f"self.nobreakmodel {self.nobreakmodel}")
+            print(f"args.cpu {utils.args.cpu}")
+
             if self.lazy_load:
                 with lazy_loader.use_lazy_load(dematerialized_modules=True):
                     metamodel = AutoModelForCausalLM.from_config(self.model_config)
-                    tf_kwargs["device_map"] = self.breakmodel_config.get_device_map(metamodel)
+                    if utils.args.cpu:
+                        cpu_map = {name: "cpu" for name in utils.layers_module_names}
+                        for name in utils.get_missing_module_names(metamodel, list(cpu_map.keys())):
+                            cpu_map[name] = "cpu"
+                        tf_kwargs["device_map"] = cpu_map
+                    else:
+                        tf_kwargs["device_map"] = self.breakmodel_config.get_device_map(metamodel)
 
             with lazy_loader.use_lazy_load(
                 enable=self.lazy_load,
diff --git a/utils.py b/utils.py
index 863bda2f..0d30f194 100644
--- a/utils.py
+++ b/utils.py
@@ -655,10 +655,13 @@ def get_auxilary_device():
 
     # NOTE: TPU isn't a torch device, so TPU stuff gets sent to CPU.
     if koboldai_vars.hascuda and koboldai_vars.usegpu:
+        print("GP")
         return koboldai_vars.gpu_device
     elif koboldai_vars.hascuda:
         # TODO: Primary device
+        print("CUDA")
         return "cuda"
+    print("CPU")
     return "cpu"
 
 #==================================================================#