Merge branch 'Model_Plugins' of https://github.com/ebolam/KoboldAI into Model_Plugins

This commit is contained in:
ebolam
2023-10-12 20:42:51 -04:00
8 changed files with 543 additions and 163 deletions

View File

@@ -1474,7 +1474,7 @@ def general_startup(override_args=None):
parser.add_argument("--cacheonly", action='store_true', help="Does not save the model to the models folder when it has been downloaded in the cache")
parser.add_argument("--customsettings", help="Preloads arguements from json file. You only need to provide the location of the json file. Use customsettings.json template file. It can be renamed if you wish so that you can store multiple configurations. Leave any settings you want as default as null. Any values you wish to set need to be in double quotation marks")
parser.add_argument("--no_ui", action='store_true', default=False, help="Disables the GUI and Socket.IO server while leaving the API server running.")
parser.add_argument("--summarizer_model", action='store', default="philschmid/bart-large-cnn-samsum", help="Huggingface model to use for summarization. Defaults to sshleifer/distilbart-cnn-12-6")
parser.add_argument("--summarizer_model", action='store', default="pszemraj/led-large-book-summary", help="Huggingface model to use for summarization. Defaults to pszemraj/led-large-book-summary")
parser.add_argument("--max_summary_length", action='store', default=75, help="Maximum size for summary to send to image generation")
parser.add_argument("--multi_story", action='store_true', default=False, help="Allow multi-story mode (experimental)")
parser.add_argument("--peft", type=str, help="Specify the path or HuggingFace ID of a Peft to load it. Not supported on TPU. (Experimental)")
@@ -7650,16 +7650,19 @@ def get_items_locations_from_text(text):
#==================================================================#
def summarize(text, max_length=100, min_length=30, unload=True):
from transformers import pipeline as summary_pipeline
from transformers import AutoConfig
start_time = time.time()
if koboldai_vars.summarizer is None:
if os.path.exists("functional_models/{}".format(args.summarizer_model.replace('/', '_'))):
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
koboldai_vars.summarizer = AutoModelForSeq2SeqLM.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
koboldai_vars.summary_model_config = AutoConfig.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
else:
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained(args.summarizer_model, cache_dir="cache")
koboldai_vars.summarizer = AutoModelForSeq2SeqLM.from_pretrained(args.summarizer_model, cache_dir="cache")
koboldai_vars.summary_tokenizer.save_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB")
koboldai_vars.summarizer.save_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB")
koboldai_vars.summary_model_config = AutoConfig.from_pretrained(args.summarizer_model, cache_dir="cache")
#Try GPU accel
if koboldai_vars.hascuda and torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved(0) >= 1645778560:
@@ -7673,9 +7676,27 @@ def summarize(text, max_length=100, min_length=30, unload=True):
#Actual sumarization
start_time = time.time()
#make sure text is less than 1024 tokens, otherwise we'll crash
if len(koboldai_vars.summary_tokenizer.encode(text)) > 1000:
text = koboldai_vars.summary_tokenizer.decode(koboldai_vars.summary_tokenizer.encode(text)[:1000])
output = tpool.execute(summarizer, text, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text']
max_tokens = koboldai_vars.summary_model_config.max_encoder_position_embeddings if hasattr(koboldai_vars.summary_model_config, 'max_encoder_position_embeddings') else 1024
logger.info("Using max summary tokens of {}".format(max_tokens))
if len(koboldai_vars.summary_tokenizer.encode(text)) > max_tokens:
text_list = koboldai_vars.actions.sentence_re.findall(text)
i=0
while i <= len(text_list)-2:
if len(koboldai_vars.summary_tokenizer.encode(text_list[i] + text_list[i+1])) < max_tokens:
text_list[i] = text_list[i] + text_list[i+1]
del text_list[i+1]
else:
i+=1
else:
text_list = [text]
output = []
logger.info("Summarizing with {} chunks of length {}".format(len(text_list), [len(koboldai_vars.summary_tokenizer.encode(x)) for x in text_list]))
for text in text_list:
output.append(tpool.execute(summarizer, text, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text'])
output = " ".join(output)
logger.debug("Time to summarize: {}".format(time.time()-start_time))
#move model back to CPU to save precious vram
torch.cuda.empty_cache()
@@ -7695,40 +7716,33 @@ def summarize(text, max_length=100, min_length=30, unload=True):
@socketio.on("refresh_auto_memory")
@logger.catch
def UI_2_refresh_auto_memory(data):
max_output_length=500
from transformers import AutoConfig
koboldai_vars.auto_memory = "Generating..."
if koboldai_vars.summary_tokenizer is None:
if os.path.exists("models/{}".format(args.summarizer_model.replace('/', '_'))):
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
if os.path.exists("functional_models/{}".format(args.summarizer_model.replace('/', '_'))):
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
koboldai_vars.summary_model_config = AutoConfig.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
else:
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained(args.summarizer_model, cache_dir="cache")
#first, let's get all of our game text and split it into sentences
sentences = [x[0] for x in koboldai_vars.actions.to_sentences()]
sentences_lengths = [len(koboldai_vars.summary_tokenizer.encode(x)) for x in sentences]
koboldai_vars.summary_model_config = AutoConfig.from_pretrained(args.summarizer_model, cache_dir="cache")
max_tokens = koboldai_vars.summary_model_config.max_encoder_position_embeddings if hasattr(koboldai_vars.summary_model_config, 'max_encoder_position_embeddings') else 1024
#first, let's get all of our game text
sentences = "".join([x[0] for x in koboldai_vars.actions.to_sentences()])
pass_number = 1
while len(koboldai_vars.summary_tokenizer.encode("".join(sentences))) > 1000:
#Now let's split them into 1000 token chunks
summary_chunks = [""]
summary_chunk_lengths = [0]
for i in range(len(sentences)):
if summary_chunk_lengths[-1] + sentences_lengths[i] <= 1000:
summary_chunks[-1] += sentences[i]
summary_chunk_lengths[-1] += sentences_lengths[i]
else:
summary_chunks.append(sentences[i])
summary_chunk_lengths.append(sentences_lengths[i])
new_sentences = []
i=0
for summary_chunk in summary_chunks:
logger.debug("summarizing chunk {}".format(i))
new_sentences.extend(re.split("(?<=[.!?])\s+", summarize(summary_chunk, unload=False)))
i+=1
while len(koboldai_vars.summary_tokenizer.encode(sentences)) > max_tokens:
new_sentences = summarize(sentences, unload=False, max_length=max_output_length)
logger.debug("Pass {}:\nSummarized to {} sentencees from {}".format(pass_number, len(new_sentences), len(sentences)))
sentences = new_sentences
koboldai_vars.auto_memory += "Pass {}:\n{}\n\n".format(pass_number, "\n".join(sentences))
koboldai_vars.auto_memory += "Pass {}:\n{}\n\n".format(pass_number, sentences)
pass_number+=1
logger.debug("OK, doing final summarization")
output = summarize(" ".join(sentences))
if len(koboldai_vars.summary_tokenizer.encode(sentences)) > max_output_length:
output = summarize(sentences, max_length=max_output_length)
else:
output = sentences
koboldai_vars.auto_memory += "\n\n Final Result:\n" + output

View File

@@ -33,6 +33,7 @@ dependencies:
- flask-cloudflared==0.0.10
- flask-ngrok
- flask-cors
- Werkzeug==2.3.7
- lupa==1.10
- transformers[sentencepiece]==4.34.0
- huggingface_hub==0.16.4

View File

@@ -33,6 +33,7 @@ dependencies:
- flask-cloudflared==0.0.10
- flask-ngrok
- flask-cors
- Werkzeug==2.3.7
- lupa==1.10
- transformers[sentencepiece]==4.34.0
- huggingface_hub==0.16.4

View File

@@ -29,6 +29,7 @@ dependencies:
- flask-cloudflared==0.0.10
- flask-ngrok
- flask-cors
- Werkzeug==2.3.7
- lupa==1.10
- transformers[sentencepiece]==4.34.0
- huggingface_hub==0.16.4

View File

@@ -1247,9 +1247,9 @@ class undefined_settings(settings):
class system_settings(settings):
local_only_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold',
'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', 'comregex_ui',
'sp', '_horde_pid', 'inference_config', 'image_pipeline',
'sp', '_horde_pid', 'inference_config', 'image_pipeline', 'summary_model_config',
'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui', 'colab_arg']
no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold',
no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 'summary_model_config',
'lua_koboldcore', 'sp', 'sp_length', '_horde_pid', 'horde_share', 'aibusy',
'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 'on_colab'
'summary_tokenizer', 'use_colab_tpu', 'noai', 'disable_set_aibusy', 'cloudflare_link', 'tts_model',
@@ -1334,6 +1334,7 @@ class system_settings(settings):
self.image_pipeline = None
self.summarizer = None
self.summary_tokenizer = None
self.summary_model_config = {}
self.keep_img_gen_in_memory = False
self.cookies = {} #cookies for colab since colab's URL changes, cookies are lost
self.experimental_features = False

View File

@@ -4,6 +4,7 @@ optimum[onnxruntime]==1.13.2
safetensors==0.3.3
Flask==2.3.3
Flask-SocketIO==5.3.2
Werkzeug==2.3.7
python-socketio==5.7.2
requests
torch == 2.0.*

View File

@@ -16,6 +16,7 @@ python-socketio==5.7.2
flask-cloudflared==0.0.10
flask-ngrok
flask-cors
Werkzeug==2.3.7
eventlet==0.33.3
dnspython==2.2.1
lupa==1.10

File diff suppressed because one or more lines are too long