Fix lazy-loading on 4-bit

2025-06-05 21:59:24 +02:00 · 2023-04-17 07:21:18 +02:00
parent 4d34f9b7de
commit 1ef515f4c2
2 changed files with 11 additions and 10 deletions
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -412,14 +412,17 @@ class HFTorchInferenceModel(HFInferenceModel):

            @functools.lru_cache(maxsize=None)
            def get_original_key(key):
-                return max(
-                    (
-                        original_key
-                        for original_key in utils.module_names
-                        if original_key.endswith(key)
-                    ),
-                    key=len,
-                )
+                try:
+                    return max(
+                        (
+                            original_key
+                            for original_key in utils.module_names
+                            if original_key.endswith(key)
+                        ),
+                        key=len,
+                    )
+                except ValueError:
+                    return key

            for key, value in model_dict.items():
                original_key = get_original_key(key)
--- a/modeling/inference_models/hf_torch_4bit.py
+++ b/modeling/inference_models/hf_torch_4bit.py
@@ -104,8 +104,6 @@ class HFTorch4BitInferenceModel(HFTorchInferenceModel):
            )
            utils.koboldai_vars.model = self.model_name

-        self.lazy_load = False
-
        self.init_model_config()

        gpulayers = utils.args.breakmodel_gpulayers