From f40236c04a69663834e04e7b39e4f5d86193d741 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Fri, 25 Aug 2023 14:27:44 +0200
Subject: [PATCH] Modern llama tokenizer

---
 modeling/inference_models/hf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index e50d87ff..7b005c9e 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -234,7 +234,7 @@ class HFInferenceModel(InferenceModel):
         if self.model_type == "llama":
             # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer
             self.tokenizer.add_bos_token = False
-
+            self.tokenizer.legacy = False
             # HF transformers no longer supports decode_with_prefix_space
             # We work around this by wrapping decode, encode, and __call__
             # with versions that work around the 'prefix space' misfeature