Model: Respect model lazyload over kaivars

kaivars dictates model config unless its from outside aiserver or whatever.
2025-06-05 21:59:24 +02:00 · 2023-03-09 20:29:12 -06:00
parent a472bdf6c3
commit 3646aa9e83
4 changed files with 13 additions and 9 deletions
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -508,10 +508,13 @@ class HFTorchInferenceModel(HFInferenceModel):
                **tf_kwargs,
            )
        except Exception as e:
+            print("Fell back for model due to", e)
+
            if "out of memory" in traceback.format_exc().lower():
                raise RuntimeError(
                    "One of your GPUs ran out of memory when KoboldAI tried to load your model."
                )
+
            return GPTNeoForCausalLM.from_pretrained(
                location,
                revision=utils.koboldai_vars.revision,