Reimplement HF workaround only for llama

2025-06-05 21:59:24 +02:00 · 2023-07-22 16:59:49 +02:00
parent 8dd7b93a6c
commit 7a5d813b92
1 changed files with 5 additions and 0 deletions
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -110,6 +110,11 @@ class model_backend(HFTorchInferenceModel):
            # Also, lazy loader doesn't support GPT-2 models
            self.lazy_load = False

+        if self.model_type == "llama":
+            tf_kwargs.update({
+                "pretraining_tp": 1 # Workaround recommended by HF to fix their mistake on the config.json tuners adopted
+            })
+        
        logger.debug(
            "lazy_load: {} hascuda: {} breakmodel: {} nobreakmode: {}".format(
                self.lazy_load,