mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Isolate OPT Tokenizer Fix to OPT models
This commit is contained in:
13
aiserver.py
13
aiserver.py
@@ -108,15 +108,14 @@ def new_init(self, *args, **kwargs):
|
|||||||
self.ncols = 99
|
self.ncols = 99
|
||||||
tqdm.__init__ = new_init
|
tqdm.__init__ = new_init
|
||||||
|
|
||||||
# Fix some issues with the OPT tokenizer
|
# Add _koboldai_header support for some optional tokenizer fixes
|
||||||
|
# This used to be an OPT tokenizer fix, this has been moved search for "# These are model specific overrides if a model has bad defaults" for the new section
|
||||||
from transformers import PreTrainedTokenizerBase
|
from transformers import PreTrainedTokenizerBase
|
||||||
old_pretrainedtokenizerbase_from_pretrained = PreTrainedTokenizerBase.from_pretrained.__func__
|
old_pretrainedtokenizerbase_from_pretrained = PreTrainedTokenizerBase.from_pretrained.__func__
|
||||||
@classmethod
|
@classmethod
|
||||||
def new_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs):
|
def new_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs):
|
||||||
tokenizer = old_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs)
|
tokenizer = old_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs)
|
||||||
tokenizer._koboldai_header = tokenizer.encode("")
|
tokenizer._koboldai_header = []
|
||||||
tokenizer.add_bos_token = False
|
|
||||||
tokenizer.add_prefix_space = False
|
|
||||||
return tokenizer
|
return tokenizer
|
||||||
PreTrainedTokenizerBase.from_pretrained = new_pretrainedtokenizerbase_from_pretrained
|
PreTrainedTokenizerBase.from_pretrained = new_pretrainedtokenizerbase_from_pretrained
|
||||||
|
|
||||||
@@ -3251,10 +3250,14 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||||||
# koboldai_vars.badwordsids.append([vocab[key]])
|
# koboldai_vars.badwordsids.append([vocab[key]])
|
||||||
|
|
||||||
# These are model specific overrides if a model has bad defaults
|
# These are model specific overrides if a model has bad defaults
|
||||||
|
tokenizer._koboldai_header = []
|
||||||
if koboldai_vars.model_type == "llama":
|
if koboldai_vars.model_type == "llama":
|
||||||
tokenizer.decode_with_prefix_space = True
|
tokenizer.decode_with_prefix_space = True
|
||||||
tokenizer.add_bos_token = False
|
tokenizer.add_bos_token = False
|
||||||
|
if koboldai_vars.model_type == "opt":
|
||||||
|
tokenizer._koboldai_header = tokenizer.encode("")
|
||||||
|
tokenizer.add_bos_token = False
|
||||||
|
tokenizer.add_prefix_space = False
|
||||||
logger.info(f"Pipeline created: {koboldai_vars.model}")
|
logger.info(f"Pipeline created: {koboldai_vars.model}")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
Reference in New Issue
Block a user