mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-02-26 08:27:46 +01:00
Fix OPT tokenization problems
This commit is contained in:
parent
49a3cf132e
commit
f71bae254a
32
aiserver.py
32
aiserver.py
@ -67,6 +67,17 @@ def new_init(self, *args, **kwargs):
|
|||||||
self.ncols = 99
|
self.ncols = 99
|
||||||
tqdm.__init__ = new_init
|
tqdm.__init__ = new_init
|
||||||
|
|
||||||
|
# Fix some issues with the OPT tokenizer
|
||||||
|
from transformers import PreTrainedTokenizerBase
|
||||||
|
old_pretrainedtokenizerbase_from_pretrained = PreTrainedTokenizerBase.from_pretrained.__func__
|
||||||
|
@classmethod
|
||||||
|
def new_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs):
|
||||||
|
tokenizer = old_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs)
|
||||||
|
tokenizer._koboldai_header = tokenizer.encode("")
|
||||||
|
tokenizer.add_bos_token = False
|
||||||
|
tokenizer.add_prefix_space = False
|
||||||
|
return tokenizer
|
||||||
|
PreTrainedTokenizerBase.from_pretrained = new_pretrainedtokenizerbase_from_pretrained
|
||||||
|
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
# Variables & Storage
|
# Variables & Storage
|
||||||
@ -1697,6 +1708,9 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
|
|||||||
# Then save the pytorch_model-#####-of-#####.bin files
|
# Then save the pytorch_model-#####-of-#####.bin files
|
||||||
for filename in filenames:
|
for filename in filenames:
|
||||||
shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, filename, revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), filename))
|
shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, filename, revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), filename))
|
||||||
|
# If the model has a tokenizer_config.json, preserve the original file instead of using the one output by tokenizer.save_pretrained (using the file output by tokenizer.save_pretrained can break OPT-350M in transformers 4.20.0)
|
||||||
|
if(os.path.isfile(os.path.join("models/{}".format(vars.model.replace('/', '_')), "tokenizer_config.json"))):
|
||||||
|
shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, "tokenizer_config.json", revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), "tokenizer_config.json"))
|
||||||
shutil.rmtree("cache/")
|
shutil.rmtree("cache/")
|
||||||
|
|
||||||
if(vars.hascuda):
|
if(vars.hascuda):
|
||||||
@ -3326,24 +3340,26 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
|
|||||||
global tokenizer
|
global tokenizer
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
|
||||||
|
|
||||||
|
lnheader = len(tokenizer._koboldai_header)
|
||||||
|
|
||||||
# Calculate token budget
|
# Calculate token budget
|
||||||
prompttkns = tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', vars.prompt)), max_length=int(2e9), truncation=True)
|
prompttkns = tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', vars.prompt)), max_length=int(2e9), truncation=True)
|
||||||
lnprompt = len(prompttkns)
|
lnprompt = len(prompttkns)
|
||||||
|
|
||||||
memtokens = tokenizer.encode(utils.encodenewlines(mem), max_length=int(2e9), truncation=True)
|
memtokens = tokenizer.encode(utils.encodenewlines(mem), max_length=int(2e9), truncation=True)
|
||||||
lnmem = len(memtokens)
|
lnmem = len(memtokens)
|
||||||
if(lnmem > vars.max_length - lnsp - vars.genamt - budget_deduction):
|
if(lnmem > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
|
||||||
raise OverflowError("The memory in your story is too long. Please either write a shorter memory text or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
|
raise OverflowError("The memory in your story is too long. Please either write a shorter memory text or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
|
||||||
|
|
||||||
witokens = tokenizer.encode(utils.encodenewlines(winfo), max_length=int(2e9), truncation=True)
|
witokens = tokenizer.encode(utils.encodenewlines(winfo), max_length=int(2e9), truncation=True)
|
||||||
lnwi = len(witokens)
|
lnwi = len(witokens)
|
||||||
if(lnmem + lnwi > vars.max_length - lnsp - vars.genamt - budget_deduction):
|
if(lnmem + lnwi > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
|
||||||
raise OverflowError("The current active world info keys take up too many tokens. Please either write shorter world info, decrease World Info Depth or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
|
raise OverflowError("The current active world info keys take up too many tokens. Please either write shorter world info, decrease World Info Depth or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
|
||||||
|
|
||||||
if(anotetxt != ""):
|
if(anotetxt != ""):
|
||||||
anotetkns = tokenizer.encode(utils.encodenewlines(anotetxt), max_length=int(2e9), truncation=True)
|
anotetkns = tokenizer.encode(utils.encodenewlines(anotetxt), max_length=int(2e9), truncation=True)
|
||||||
lnanote = len(anotetkns)
|
lnanote = len(anotetkns)
|
||||||
if(lnmem + lnwi + lnanote > vars.max_length - lnsp - vars.genamt - budget_deduction):
|
if(lnmem + lnwi + lnanote > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
|
||||||
raise OverflowError("The author's note in your story is too long. Please either write a shorter author's note or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
|
raise OverflowError("The author's note in your story is too long. Please either write a shorter author's note or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
|
||||||
|
|
||||||
if(vars.useprompt):
|
if(vars.useprompt):
|
||||||
@ -3354,14 +3370,14 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
|
|||||||
lnsubmission = len(tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', submission)), max_length=int(2e9), truncation=True)) if submission is not None else 0
|
lnsubmission = len(tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', submission)), max_length=int(2e9), truncation=True)) if submission is not None else 0
|
||||||
maybe_lnprompt = lnprompt if vars.useprompt and actionlen > 0 else 0
|
maybe_lnprompt = lnprompt if vars.useprompt and actionlen > 0 else 0
|
||||||
|
|
||||||
if(lnmem + lnwi + lnanote + maybe_lnprompt + lnsubmission > vars.max_length - lnsp - vars.genamt - budget_deduction):
|
if(lnmem + lnwi + lnanote + maybe_lnprompt + lnsubmission > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
|
||||||
raise OverflowError("Your submission is too long. Please either write a shorter submission or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt. If you are using the Always Add Prompt setting, turning it off may help.")
|
raise OverflowError("Your submission is too long. Please either write a shorter submission or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt. If you are using the Always Add Prompt setting, turning it off may help.")
|
||||||
|
|
||||||
assert budget >= 0
|
assert budget >= 0
|
||||||
|
|
||||||
if(actionlen == 0):
|
if(actionlen == 0):
|
||||||
# First/Prompt action
|
# First/Prompt action
|
||||||
tokens = memtokens + witokens + anotetkns + prompttkns
|
tokens = tokenizer._koboldai_header + memtokens + witokens + anotetkns + prompttkns
|
||||||
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
|
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
|
||||||
ln = len(tokens) + lnsp
|
ln = len(tokens) + lnsp
|
||||||
return tokens, ln+1, ln+vars.genamt
|
return tokens, ln+1, ln+vars.genamt
|
||||||
@ -3409,12 +3425,12 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
|
|||||||
# Did we get to add the A.N.? If not, do it here
|
# Did we get to add the A.N.? If not, do it here
|
||||||
if(anotetxt != ""):
|
if(anotetxt != ""):
|
||||||
if((not anoteadded) or forceanote):
|
if((not anoteadded) or forceanote):
|
||||||
tokens = memtokens + witokens + anotetkns + prompttkns + tokens
|
tokens = tokenizer._koboldai_header + memtokens + witokens + anotetkns + prompttkns + tokens
|
||||||
else:
|
else:
|
||||||
tokens = memtokens + witokens + prompttkns + tokens
|
tokens = tokenizer._koboldai_header + memtokens + witokens + prompttkns + tokens
|
||||||
else:
|
else:
|
||||||
# Prepend Memory, WI, and Prompt before action tokens
|
# Prepend Memory, WI, and Prompt before action tokens
|
||||||
tokens = memtokens + witokens + prompttkns + tokens
|
tokens = tokenizer._koboldai_header + memtokens + witokens + prompttkns + tokens
|
||||||
|
|
||||||
# Send completed bundle to generator
|
# Send completed bundle to generator
|
||||||
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
|
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
|
||||||
|
Loading…
x
Reference in New Issue
Block a user