From 9eaa2aba47dda877fdf2a120bfbff9b1aa7e70ce Mon Sep 17 00:00:00 2001 From: Henk Date: Tue, 25 Apr 2023 22:49:56 +0200 Subject: [PATCH] Isolate OPT Tokenizer Fix to OPT models --- aiserver.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/aiserver.py b/aiserver.py index 9dd621ef..7e198dee 100644 --- a/aiserver.py +++ b/aiserver.py @@ -108,15 +108,14 @@ def new_init(self, *args, **kwargs): self.ncols = 99 tqdm.__init__ = new_init -# Fix some issues with the OPT tokenizer +# Add _koboldai_header support for some optional tokenizer fixes +# This used to be an OPT tokenizer fix, this has been moved search for "# These are model specific overrides if a model has bad defaults" for the new section from transformers import PreTrainedTokenizerBase old_pretrainedtokenizerbase_from_pretrained = PreTrainedTokenizerBase.from_pretrained.__func__ @classmethod def new_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs): tokenizer = old_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs) - tokenizer._koboldai_header = tokenizer.encode("") - tokenizer.add_bos_token = False - tokenizer.add_prefix_space = False + tokenizer._koboldai_header = [] return tokenizer PreTrainedTokenizerBase.from_pretrained = new_pretrainedtokenizerbase_from_pretrained @@ -3251,10 +3250,14 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal # koboldai_vars.badwordsids.append([vocab[key]]) # These are model specific overrides if a model has bad defaults + tokenizer._koboldai_header = [] if koboldai_vars.model_type == "llama": tokenizer.decode_with_prefix_space = True tokenizer.add_bos_token = False - + if koboldai_vars.model_type == "opt": + tokenizer._koboldai_header = tokenizer.encode("") + tokenizer.add_bos_token = False + tokenizer.add_prefix_space = False logger.info(f"Pipeline created: {koboldai_vars.model}") else: