From 21de36c4b03ca35242a10fc17b70be5d38f79d36 Mon Sep 17 00:00:00 2001
From: Gnome Ann <>
Date: Sun, 19 Jun 2022 16:44:23 -0400
Subject: [PATCH] Lazy loader now moves all non-layer weights to primary device

---
 aiserver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index c12fbd52..e3cc829b 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1677,7 +1677,7 @@ def load_model(use_gpu=True, gpu_layers=None, initial_load=False, online_model="
 
                     for key, value in model_dict.items():
                         if isinstance(value, torch_lazy_loader.LazyTensor) and not any(key.startswith(n) or key.startswith(n.split(".", 1)[1]) for n in vars.layers_module_names):
-                            device_map[key] = vars.gpu_device if vars.hascuda and vars.usegpu else "cpu"
+                            device_map[key] = vars.gpu_device if vars.hascuda and vars.usegpu else "cpu" if not vars.hascuda or not vars.breakmodel else breakmodel.primary_device
                         else:
                             layer = int(max((n for n in vars.layers_module_names if key.startswith(n) or key.startswith(n.split(".", 1)[1])), key=len).rsplit(".", 1)[1])
                             device = vars.gpu_device if vars.hascuda and vars.usegpu else "cpu" if not vars.hascuda or not vars.breakmodel else "shared" if layer < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, layer - ram_blocks)