Reimplement HF workaround only for llama

This commit is contained in:
Henk
2023-07-22 16:59:49 +02:00
parent 8dd7b93a6c
commit 7a5d813b92

View File

@@ -110,6 +110,11 @@ class model_backend(HFTorchInferenceModel):
# Also, lazy loader doesn't support GPT-2 models
self.lazy_load = False
if self.model_type == "llama":
tf_kwargs.update({
"pretraining_tp": 1 # Workaround recommended by HF to fix their mistake on the config.json tuners adopted
})
logger.debug(
"lazy_load: {} hascuda: {} breakmodel: {} nobreakmode: {}".format(
self.lazy_load,