Merge branch 'main' into united-merge

This commit is contained in:
Gnome Ann 2022-06-18 13:39:23 -04:00
commit 0eedc541c8
2 changed files with 45 additions and 8 deletions

View File

@ -80,6 +80,17 @@ def new_init(self, *args, **kwargs):
self.ncols = 99 self.ncols = 99
tqdm.__init__ = new_init tqdm.__init__ = new_init
# Fix some issues with the OPT tokenizer
from transformers import PreTrainedTokenizerBase
old_pretrainedtokenizerbase_from_pretrained = PreTrainedTokenizerBase.from_pretrained.__func__
@classmethod
def new_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs):
tokenizer = old_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs)
tokenizer._koboldai_header = tokenizer.encode("")
tokenizer.add_bos_token = False
tokenizer.add_prefix_space = False
return tokenizer
PreTrainedTokenizerBase.from_pretrained = new_pretrainedtokenizerbase_from_pretrained
#==================================================================# #==================================================================#
# Variables & Storage # Variables & Storage
@ -1807,6 +1818,10 @@ def load_model(use_gpu=True, gpu_layers=None, initial_load=False, online_model="
if(os.path.isdir(vars.custmodpth)): if(os.path.isdir(vars.custmodpth)):
try: try:
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache") tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
except Exception as e:
pass
try:
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", use_fast=False)
except Exception as e: except Exception as e:
try: try:
tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache") tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
@ -1821,6 +1836,10 @@ def load_model(use_gpu=True, gpu_layers=None, initial_load=False, online_model="
elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))): elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
try: try:
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache") tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
except Exception as e:
pass
try:
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", use_fast=False)
except Exception as e: except Exception as e:
try: try:
tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache") tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
@ -1848,6 +1867,10 @@ def load_model(use_gpu=True, gpu_layers=None, initial_load=False, online_model="
try: try:
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache") tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
except Exception as e:
pass
try:
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", use_fast=False)
except Exception as e: except Exception as e:
try: try:
tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache") tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
@ -3603,24 +3626,26 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
global tokenizer global tokenizer
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache") tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
lnheader = len(tokenizer._koboldai_header)
# Calculate token budget # Calculate token budget
prompttkns = tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', vars.prompt)), max_length=int(2e9), truncation=True) prompttkns = tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', vars.prompt)), max_length=int(2e9), truncation=True)
lnprompt = len(prompttkns) lnprompt = len(prompttkns)
memtokens = tokenizer.encode(utils.encodenewlines(mem), max_length=int(2e9), truncation=True) memtokens = tokenizer.encode(utils.encodenewlines(mem), max_length=int(2e9), truncation=True)
lnmem = len(memtokens) lnmem = len(memtokens)
if(lnmem > vars.max_length - lnsp - vars.genamt - budget_deduction): if(lnmem > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
raise OverflowError("The memory in your story is too long. Please either write a shorter memory text or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.") raise OverflowError("The memory in your story is too long. Please either write a shorter memory text or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
witokens = tokenizer.encode(utils.encodenewlines(winfo), max_length=int(2e9), truncation=True) witokens = tokenizer.encode(utils.encodenewlines(winfo), max_length=int(2e9), truncation=True)
lnwi = len(witokens) lnwi = len(witokens)
if(lnmem + lnwi > vars.max_length - lnsp - vars.genamt - budget_deduction): if(lnmem + lnwi > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
raise OverflowError("The current active world info keys take up too many tokens. Please either write shorter world info, decrease World Info Depth or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.") raise OverflowError("The current active world info keys take up too many tokens. Please either write shorter world info, decrease World Info Depth or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
if(anotetxt != ""): if(anotetxt != ""):
anotetkns = tokenizer.encode(utils.encodenewlines(anotetxt), max_length=int(2e9), truncation=True) anotetkns = tokenizer.encode(utils.encodenewlines(anotetxt), max_length=int(2e9), truncation=True)
lnanote = len(anotetkns) lnanote = len(anotetkns)
if(lnmem + lnwi + lnanote > vars.max_length - lnsp - vars.genamt - budget_deduction): if(lnmem + lnwi + lnanote > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
raise OverflowError("The author's note in your story is too long. Please either write a shorter author's note or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.") raise OverflowError("The author's note in your story is too long. Please either write a shorter author's note or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
if(vars.useprompt): if(vars.useprompt):
@ -3631,14 +3656,14 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
lnsubmission = len(tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', submission)), max_length=int(2e9), truncation=True)) if submission is not None else 0 lnsubmission = len(tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', submission)), max_length=int(2e9), truncation=True)) if submission is not None else 0
maybe_lnprompt = lnprompt if vars.useprompt and actionlen > 0 else 0 maybe_lnprompt = lnprompt if vars.useprompt and actionlen > 0 else 0
if(lnmem + lnwi + lnanote + maybe_lnprompt + lnsubmission > vars.max_length - lnsp - vars.genamt - budget_deduction): if(lnmem + lnwi + lnanote + maybe_lnprompt + lnsubmission > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
raise OverflowError("Your submission is too long. Please either write a shorter submission or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt. If you are using the Always Add Prompt setting, turning it off may help.") raise OverflowError("Your submission is too long. Please either write a shorter submission or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt. If you are using the Always Add Prompt setting, turning it off may help.")
assert budget >= 0 assert budget >= 0
if(actionlen == 0): if(actionlen == 0):
# First/Prompt action # First/Prompt action
tokens = memtokens + witokens + anotetkns + prompttkns tokens = tokenizer._koboldai_header + memtokens + witokens + anotetkns + prompttkns
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
ln = len(tokens) + lnsp ln = len(tokens) + lnsp
return tokens, ln+1, ln+vars.genamt return tokens, ln+1, ln+vars.genamt
@ -3686,12 +3711,12 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
# Did we get to add the A.N.? If not, do it here # Did we get to add the A.N.? If not, do it here
if(anotetxt != ""): if(anotetxt != ""):
if((not anoteadded) or forceanote): if((not anoteadded) or forceanote):
tokens = memtokens + witokens + anotetkns + prompttkns + tokens tokens = tokenizer._koboldai_header + memtokens + witokens + anotetkns + prompttkns + tokens
else: else:
tokens = memtokens + witokens + prompttkns + tokens tokens = tokenizer._koboldai_header + memtokens + witokens + prompttkns + tokens
else: else:
# Prepend Memory, WI, and Prompt before action tokens # Prepend Memory, WI, and Prompt before action tokens
tokens = memtokens + witokens + prompttkns + tokens tokens = tokenizer._koboldai_header + memtokens + witokens + prompttkns + tokens
# Send completed bundle to generator # Send completed bundle to generator
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction

View File

@ -1324,6 +1324,10 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
if(os.path.isdir(vars.custmodpth)): if(os.path.isdir(vars.custmodpth)):
try: try:
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache") tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
except Exception as e:
pass
try:
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", use_fast=False)
except Exception as e: except Exception as e:
try: try:
tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache") tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
@ -1336,6 +1340,10 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))): elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
try: try:
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache") tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
except Exception as e:
pass
try:
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", use_fast=False)
except Exception as e: except Exception as e:
try: try:
tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache") tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
@ -1348,6 +1356,10 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
else: else:
try: try:
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache") tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
except Exception as e:
pass
try:
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", use_fast=False)
except Exception as e: except Exception as e:
try: try:
tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache") tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")