Enhancements to auto-memory test. Seems to be more coherent.

This commit is contained in:
ebolam
2023-10-06 09:40:41 -04:00
parent 4d793bfa59
commit 88d4dc88b9
2 changed files with 45 additions and 30 deletions

View File

@@ -1474,7 +1474,7 @@ def general_startup(override_args=None):
parser.add_argument("--cacheonly", action='store_true', help="Does not save the model to the models folder when it has been downloaded in the cache") parser.add_argument("--cacheonly", action='store_true', help="Does not save the model to the models folder when it has been downloaded in the cache")
parser.add_argument("--customsettings", help="Preloads arguements from json file. You only need to provide the location of the json file. Use customsettings.json template file. It can be renamed if you wish so that you can store multiple configurations. Leave any settings you want as default as null. Any values you wish to set need to be in double quotation marks") parser.add_argument("--customsettings", help="Preloads arguements from json file. You only need to provide the location of the json file. Use customsettings.json template file. It can be renamed if you wish so that you can store multiple configurations. Leave any settings you want as default as null. Any values you wish to set need to be in double quotation marks")
parser.add_argument("--no_ui", action='store_true', default=False, help="Disables the GUI and Socket.IO server while leaving the API server running.") parser.add_argument("--no_ui", action='store_true', default=False, help="Disables the GUI and Socket.IO server while leaving the API server running.")
parser.add_argument("--summarizer_model", action='store', default="philschmid/bart-large-cnn-samsum", help="Huggingface model to use for summarization. Defaults to sshleifer/distilbart-cnn-12-6") parser.add_argument("--summarizer_model", action='store', default="pszemraj/led-large-book-summary", help="Huggingface model to use for summarization. Defaults to pszemraj/led-large-book-summary")
parser.add_argument("--max_summary_length", action='store', default=75, help="Maximum size for summary to send to image generation") parser.add_argument("--max_summary_length", action='store', default=75, help="Maximum size for summary to send to image generation")
parser.add_argument("--multi_story", action='store_true', default=False, help="Allow multi-story mode (experimental)") parser.add_argument("--multi_story", action='store_true', default=False, help="Allow multi-story mode (experimental)")
parser.add_argument("--peft", type=str, help="Specify the path or HuggingFace ID of a Peft to load it. Not supported on TPU. (Experimental)") parser.add_argument("--peft", type=str, help="Specify the path or HuggingFace ID of a Peft to load it. Not supported on TPU. (Experimental)")
@@ -7637,16 +7637,19 @@ def get_items_locations_from_text(text):
#==================================================================# #==================================================================#
def summarize(text, max_length=100, min_length=30, unload=True): def summarize(text, max_length=100, min_length=30, unload=True):
from transformers import pipeline as summary_pipeline from transformers import pipeline as summary_pipeline
from transformers import AutoConfig
start_time = time.time() start_time = time.time()
if koboldai_vars.summarizer is None: if koboldai_vars.summarizer is None:
if os.path.exists("functional_models/{}".format(args.summarizer_model.replace('/', '_'))): if os.path.exists("functional_models/{}".format(args.summarizer_model.replace('/', '_'))):
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache") koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
koboldai_vars.summarizer = AutoModelForSeq2SeqLM.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache") koboldai_vars.summarizer = AutoModelForSeq2SeqLM.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
koboldai_vars.summary_model_config = AutoConfig.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
else: else:
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained(args.summarizer_model, cache_dir="cache") koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained(args.summarizer_model, cache_dir="cache")
koboldai_vars.summarizer = AutoModelForSeq2SeqLM.from_pretrained(args.summarizer_model, cache_dir="cache") koboldai_vars.summarizer = AutoModelForSeq2SeqLM.from_pretrained(args.summarizer_model, cache_dir="cache")
koboldai_vars.summary_tokenizer.save_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB") koboldai_vars.summary_tokenizer.save_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB")
koboldai_vars.summarizer.save_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB") koboldai_vars.summarizer.save_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB")
koboldai_vars.summary_model_config = AutoConfig.from_pretrained(args.summarizer_model, cache_dir="cache")
#Try GPU accel #Try GPU accel
if koboldai_vars.hascuda and torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved(0) >= 1645778560: if koboldai_vars.hascuda and torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved(0) >= 1645778560:
@@ -7660,9 +7663,27 @@ def summarize(text, max_length=100, min_length=30, unload=True):
#Actual sumarization #Actual sumarization
start_time = time.time() start_time = time.time()
#make sure text is less than 1024 tokens, otherwise we'll crash #make sure text is less than 1024 tokens, otherwise we'll crash
if len(koboldai_vars.summary_tokenizer.encode(text)) > 1000: max_tokens = koboldai_vars.summary_model_config.max_encoder_position_embeddings if hasattr(koboldai_vars.summary_model_config, 'max_encoder_position_embeddings') else 1024
text = koboldai_vars.summary_tokenizer.decode(koboldai_vars.summary_tokenizer.encode(text)[:1000]) logger.info("Using max summary tokens of {}".format(max_tokens))
output = tpool.execute(summarizer, text, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text'] if len(koboldai_vars.summary_tokenizer.encode(text)) > max_tokens:
text_list = koboldai_vars.actions.sentence_re.findall(text)
i=0
while i <= len(text_list)-2:
if len(koboldai_vars.summary_tokenizer.encode(text_list[i] + text_list[i+1])) < max_tokens:
text_list[i] = text_list[i] + text_list[i+1]
del text_list[i+1]
else:
i+=1
else:
text_list = [text]
output = []
logger.info("Summarizing with {} chunks of length {}".format(len(text_list), [len(koboldai_vars.summary_tokenizer.encode(x)) for x in text_list]))
for text in text_list:
output.append(tpool.execute(summarizer, text, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text'])
output = " ".join(output)
logger.debug("Time to summarize: {}".format(time.time()-start_time)) logger.debug("Time to summarize: {}".format(time.time()-start_time))
#move model back to CPU to save precious vram #move model back to CPU to save precious vram
torch.cuda.empty_cache() torch.cuda.empty_cache()
@@ -7682,40 +7703,33 @@ def summarize(text, max_length=100, min_length=30, unload=True):
@socketio.on("refresh_auto_memory") @socketio.on("refresh_auto_memory")
@logger.catch @logger.catch
def UI_2_refresh_auto_memory(data): def UI_2_refresh_auto_memory(data):
max_output_length=500
from transformers import AutoConfig
koboldai_vars.auto_memory = "Generating..." koboldai_vars.auto_memory = "Generating..."
if koboldai_vars.summary_tokenizer is None: if koboldai_vars.summary_tokenizer is None:
if os.path.exists("models/{}".format(args.summarizer_model.replace('/', '_'))): if os.path.exists("functional_models/{}".format(args.summarizer_model.replace('/', '_'))):
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache") koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
koboldai_vars.summary_model_config = AutoConfig.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
else: else:
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained(args.summarizer_model, cache_dir="cache") koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained(args.summarizer_model, cache_dir="cache")
#first, let's get all of our game text and split it into sentences koboldai_vars.summary_model_config = AutoConfig.from_pretrained(args.summarizer_model, cache_dir="cache")
sentences = [x[0] for x in koboldai_vars.actions.to_sentences()] max_tokens = koboldai_vars.summary_model_config.max_encoder_position_embeddings if hasattr(koboldai_vars.summary_model_config, 'max_encoder_position_embeddings') else 1024
sentences_lengths = [len(koboldai_vars.summary_tokenizer.encode(x)) for x in sentences]
#first, let's get all of our game text
sentences = "".join([x[0] for x in koboldai_vars.actions.to_sentences()])
pass_number = 1 pass_number = 1
while len(koboldai_vars.summary_tokenizer.encode("".join(sentences))) > 1000: while len(koboldai_vars.summary_tokenizer.encode(sentences)) > max_tokens:
#Now let's split them into 1000 token chunks new_sentences = summarize(sentences, unload=False, max_length=max_output_length)
summary_chunks = [""]
summary_chunk_lengths = [0]
for i in range(len(sentences)):
if summary_chunk_lengths[-1] + sentences_lengths[i] <= 1000:
summary_chunks[-1] += sentences[i]
summary_chunk_lengths[-1] += sentences_lengths[i]
else:
summary_chunks.append(sentences[i])
summary_chunk_lengths.append(sentences_lengths[i])
new_sentences = []
i=0
for summary_chunk in summary_chunks:
logger.debug("summarizing chunk {}".format(i))
new_sentences.extend(re.split("(?<=[.!?])\s+", summarize(summary_chunk, unload=False)))
i+=1
logger.debug("Pass {}:\nSummarized to {} sentencees from {}".format(pass_number, len(new_sentences), len(sentences))) logger.debug("Pass {}:\nSummarized to {} sentencees from {}".format(pass_number, len(new_sentences), len(sentences)))
sentences = new_sentences sentences = new_sentences
koboldai_vars.auto_memory += "Pass {}:\n{}\n\n".format(pass_number, "\n".join(sentences)) koboldai_vars.auto_memory += "Pass {}:\n{}\n\n".format(pass_number, sentences)
pass_number+=1 pass_number+=1
logger.debug("OK, doing final summarization") logger.debug("OK, doing final summarization")
output = summarize(" ".join(sentences)) if len(koboldai_vars.summary_tokenizer.encode(sentences)) > max_output_length:
output = summarize(sentences, max_length=max_output_length)
else:
output = sentences
koboldai_vars.auto_memory += "\n\n Final Result:\n" + output koboldai_vars.auto_memory += "\n\n Final Result:\n" + output

View File

@@ -1247,9 +1247,9 @@ class undefined_settings(settings):
class system_settings(settings): class system_settings(settings):
local_only_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', local_only_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold',
'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', 'comregex_ui', 'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', 'comregex_ui',
'sp', '_horde_pid', 'inference_config', 'image_pipeline', 'sp', '_horde_pid', 'inference_config', 'image_pipeline', 'summary_model_config',
'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui', 'colab_arg'] 'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui', 'colab_arg']
no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 'summary_model_config',
'lua_koboldcore', 'sp', 'sp_length', '_horde_pid', 'horde_share', 'aibusy', 'lua_koboldcore', 'sp', 'sp_length', '_horde_pid', 'horde_share', 'aibusy',
'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 'on_colab' 'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 'on_colab'
'summary_tokenizer', 'use_colab_tpu', 'noai', 'disable_set_aibusy', 'cloudflare_link', 'tts_model', 'summary_tokenizer', 'use_colab_tpu', 'noai', 'disable_set_aibusy', 'cloudflare_link', 'tts_model',
@@ -1334,6 +1334,7 @@ class system_settings(settings):
self.image_pipeline = None self.image_pipeline = None
self.summarizer = None self.summarizer = None
self.summary_tokenizer = None self.summary_tokenizer = None
self.summary_model_config = {}
self.keep_img_gen_in_memory = False self.keep_img_gen_in_memory = False
self.cookies = {} #cookies for colab since colab's URL changes, cookies are lost self.cookies = {} #cookies for colab since colab's URL changes, cookies are lost
self.experimental_features = False self.experimental_features = False