Lazy loader can now use accelerate's init_empty_weights()

2025-06-05 21:59:24 +02:00 · 2022-06-16 18:56:16 -04:00
parent 5253cdcb36
commit 8bdf17f598
2 changed files with 29 additions and 20 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -1788,7 +1788,7 @@ def load_model(use_gpu=True, gpu_layers=None, initial_load=False, online_model="
                    shutil.move(vars.model.replace('/', '_'), "models/{}".format(vars.model.replace('/', '_')))
                print("\n", flush=True)
                if(vars.lazy_load):  # If we're using lazy loader, we need to figure out what the model's hidden layers are called
-                    with torch_lazy_loader.use_lazy_torch_load(dematerialized_modules=True):
+                    with torch_lazy_loader.use_lazy_torch_load(dematerialized_modules=True, use_accelerate_init_empty_weights=True):
                        try:
                            metamodel = AutoModelForCausalLM.from_config(model_config)
                        except Exception as e: