Fall back to unpatched HF

2025-06-05 21:59:24 +02:00 · 2023-07-08 14:36:45 -05:00
parent c2ee30af32
commit 3928d86339
3 changed files with 61 additions and 34 deletions
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -364,7 +364,7 @@ class HFTorchInferenceModel(HFInferenceModel):
        except Exception as e:
            logger.warning(f"{self.model_name} is a no-go; {e} - Falling back to auto.")
            if utils.args.panic:
-                raise e
+                raise

        # Try to determine model type from either AutoModel or falling back to legacy
        try:
@@ -383,11 +383,28 @@ class HFTorchInferenceModel(HFInferenceModel):
                            metamodel
                        )

-            with lazy_loader.use_lazy_load(
-                enable=self.lazy_load,
-                # DO NOT DEMATERIALIZE MODULES / INIT WEIGHTS EMPTY!!! IT WILL EXPLODE!!!!!!!
-                dematerialized_modules=False,
-            ):
+            try:
+                # Try to load with the lazyloader first...
+                with lazy_loader.use_lazy_load(
+                    enable=self.lazy_load,
+                    # DO NOT DEMATERIALIZE MODULES / INIT WEIGHTS EMPTY!!! IT WILL EXPLODE!!!!!!!
+                    dematerialized_modules=False,
+                ):
+                    model = AutoModelForCausalLM.from_pretrained(
+                        location,
+                        offload_folder="accelerate-disk-cache",
+                        torch_dtype=self._get_target_dtype(),
+                        **tf_kwargs,
+                    )
+            except Exception as e:
+                # ...but fall back to stock HF if lazyloader fails.
+                if utils.args.panic:
+                    raise
+                logger.error("Lazyloader failed, falling back to stock HF load. You may run out of RAM here. Details:")
+                logger.error(e)
+                logger.error(traceback.format_exc())
+                logger.info("Falling back to stock HF load...")
+
                model = AutoModelForCausalLM.from_pretrained(
                    location,
                    offload_folder="accelerate-disk-cache",
@@ -417,7 +434,7 @@ class HFTorchInferenceModel(HFInferenceModel):
                raise

            if utils.args.panic:
-                raise e
+                raise

            logger.warning(f"Fell back to GPT2LMHeadModel due to {e}")
            logger.debug(traceback.format_exc())