mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Test Summarization
This commit is contained in:
121
aiserver.py
121
aiserver.py
@@ -8379,7 +8379,6 @@ def UI_2_generate_image(data):
|
||||
|
||||
#If we have > 4 keys, use those otherwise use sumarization
|
||||
if len(keys) < 4:
|
||||
from transformers import pipeline as summary_pipeline
|
||||
start_time = time.time()
|
||||
#text to summarize:
|
||||
if len(koboldai_vars.actions) < 5:
|
||||
@@ -8389,41 +8388,7 @@ def UI_2_generate_image(data):
|
||||
|
||||
|
||||
|
||||
if koboldai_vars.summarizer is None:
|
||||
if os.path.exists("models/{}".format(args.summarizer_model.replace('/', '_'))):
|
||||
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
|
||||
koboldai_vars.summarizer = AutoModelForSeq2SeqLM.from_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
|
||||
else:
|
||||
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained(args.summarizer_model, cache_dir="cache")
|
||||
koboldai_vars.summarizer = AutoModelForSeq2SeqLM.from_pretrained(args.summarizer_model, cache_dir="cache")
|
||||
koboldai_vars.summary_tokenizer.save_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB")
|
||||
koboldai_vars.summarizer.save_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB")
|
||||
|
||||
#Try GPU accel
|
||||
if koboldai_vars.hascuda and torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved(0) >= 1645778560:
|
||||
koboldai_vars.summarizer.to(0)
|
||||
device=0
|
||||
else:
|
||||
device=-1
|
||||
summarizer = tpool.execute(summary_pipeline, task="summarization", model=koboldai_vars.summarizer, tokenizer=koboldai_vars.summary_tokenizer, device=device)
|
||||
logger.debug("Time to load summarizer: {}".format(time.time()-start_time))
|
||||
|
||||
#Actual sumarization
|
||||
start_time = time.time()
|
||||
global old_transfomers_functions
|
||||
temp = transformers.generation_utils.GenerationMixin._get_stopping_criteria
|
||||
transformers.generation_utils.GenerationMixin._get_stopping_criteria = old_transfomers_functions['transformers.generation_utils.GenerationMixin._get_stopping_criteria']
|
||||
keys = [tpool.execute(summarizer, text, max_length=args.max_summary_length, min_length=30, do_sample=False)[0]['summary_text']]
|
||||
transformers.generation_utils.GenerationMixin._get_stopping_criteria = temp
|
||||
logger.debug("Time to summarize: {}".format(time.time()-start_time))
|
||||
#move model back to CPU to save precious vram
|
||||
torch.cuda.empty_cache()
|
||||
logger.debug("VRAM used by summarization: {}".format(torch.cuda.memory_reserved(0)))
|
||||
koboldai_vars.summarizer.to("cpu")
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
logger.debug("Original Text: {}".format(text))
|
||||
logger.debug("Summarized Text: {}".format(keys[0]))
|
||||
keys = [summarize(text, max_length=args.max_summary_length)]
|
||||
|
||||
art_guide = 'fantasy illustration, artstation, by jason felix by steve argyle by tyler jacobson by peter mohrbacher, cinematic lighting',
|
||||
|
||||
@@ -8571,9 +8536,93 @@ def get_items_locations_from_text(text):
|
||||
print("Locations: {}".format(loc))
|
||||
print("People: {}".format(per))
|
||||
|
||||
#==================================================================#
|
||||
# summarizer
|
||||
#==================================================================#
|
||||
def summarize(text, max_length=100, min_length=30):
|
||||
from transformers import pipeline as summary_pipeline
|
||||
start_time = time.time()
|
||||
if koboldai_vars.summarizer is None:
|
||||
if os.path.exists("models/{}".format(args.summarizer_model.replace('/', '_'))):
|
||||
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
|
||||
koboldai_vars.summarizer = AutoModelForSeq2SeqLM.from_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
|
||||
else:
|
||||
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained(args.summarizer_model, cache_dir="cache")
|
||||
koboldai_vars.summarizer = AutoModelForSeq2SeqLM.from_pretrained(args.summarizer_model, cache_dir="cache")
|
||||
koboldai_vars.summary_tokenizer.save_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB")
|
||||
koboldai_vars.summarizer.save_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB")
|
||||
|
||||
#Try GPU accel
|
||||
if koboldai_vars.hascuda and torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved(0) >= 1645778560:
|
||||
koboldai_vars.summarizer.to(0)
|
||||
device=0
|
||||
else:
|
||||
device=-1
|
||||
summarizer = tpool.execute(summary_pipeline, task="summarization", model=koboldai_vars.summarizer, tokenizer=koboldai_vars.summary_tokenizer, device=device)
|
||||
logger.debug("Time to load summarizer: {}".format(time.time()-start_time))
|
||||
|
||||
#Actual sumarization
|
||||
start_time = time.time()
|
||||
global old_transfomers_functions
|
||||
temp = transformers.generation_utils.GenerationMixin._get_stopping_criteria
|
||||
transformers.generation_utils.GenerationMixin._get_stopping_criteria = old_transfomers_functions['transformers.generation_utils.GenerationMixin._get_stopping_criteria']
|
||||
#make sure text is less than 1024 tokens, otherwise we'll crash
|
||||
if len(koboldai_vars.summary_tokenizer.encode(text)) > 1000:
|
||||
text = koboldai_vars.summary_tokenizer.decode(koboldai_vars.summary_tokenizer.encode(text)[:1000])
|
||||
output = tpool.execute(summarizer, text, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text']
|
||||
transformers.generation_utils.GenerationMixin._get_stopping_criteria = temp
|
||||
logger.debug("Time to summarize: {}".format(time.time()-start_time))
|
||||
#move model back to CPU to save precious vram
|
||||
torch.cuda.empty_cache()
|
||||
logger.debug("VRAM used by summarization: {}".format(torch.cuda.memory_reserved(0)))
|
||||
koboldai_vars.summarizer.to("cpu")
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
logger.debug("Original Text: {}".format(text))
|
||||
logger.debug("Summarized Text: {}".format(output))
|
||||
|
||||
return output
|
||||
|
||||
|
||||
|
||||
#==================================================================#
|
||||
# Test
|
||||
#==================================================================#
|
||||
@app.route("/summarize")
|
||||
def request_summarize():
|
||||
if koboldai_vars.summary_tokenizer is None:
|
||||
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
|
||||
#first, let's get all of our game text and split it into sentences
|
||||
sentences = [x[0] for x in koboldai_vars.actions.to_sentences()]
|
||||
sentences_lengths = [len(koboldai_vars.summary_tokenizer.encode(x)) for x in sentences]
|
||||
|
||||
|
||||
while len(koboldai_vars.summary_tokenizer.encode("".join(sentences))) > 1000:
|
||||
#Now let's split them into 1000 token chunks
|
||||
summary_chunks = [""]
|
||||
summary_chunk_lengths = [0]
|
||||
for i in range(len(sentences)):
|
||||
if summary_chunk_lengths[-1] + sentences_lengths[i] <= 1000:
|
||||
summary_chunks[-1] += sentences[i]
|
||||
summary_chunk_lengths[-1] += sentences_lengths[i]
|
||||
else:
|
||||
summary_chunks.append(sentences[i])
|
||||
summary_chunk_lengths.append(sentences_lengths[i])
|
||||
new_sentences = []
|
||||
i=0
|
||||
for summary_chunk in summary_chunks:
|
||||
print("summarizing chunk {}".format(i))
|
||||
new_sentences.extend(re.split("(?<=[.!?])\s+", summarize(summary_chunk)))
|
||||
i+=1
|
||||
print("Summarized to {} sentencees from {}".format(len(new_sentences), len(sentences)))
|
||||
sentences = new_sentences
|
||||
print("OK, doing final summarization")
|
||||
output = summarize(" ".join(sentences))
|
||||
print(output)
|
||||
return "Input tokens: {}\nOutput tokens: {}\n{}".format(len(koboldai_vars.summary_tokenizer.encode(request.args['text'])),
|
||||
len(koboldai_vars.summary_tokenizer.encode(output)),
|
||||
output)
|
||||
|
||||
@app.route("/vars")
|
||||
@logger.catch
|
||||
def show_vars():
|
||||
|
@@ -153,47 +153,7 @@ class koboldai_vars(object):
|
||||
text += wi_text
|
||||
|
||||
|
||||
#we're going to split our actions by sentence for better context. We'll add in which actions the sentence covers. Prompt will be added at a -1 ID
|
||||
actions = {i: self.actions[i] for i in range(len(self.actions))}
|
||||
actions[-1] = self.prompt
|
||||
action_text = str(self.actions)
|
||||
action_text = "{}{}".format(self.prompt, action_text)
|
||||
###########action_text_split = [sentence, actions used in sentence, token length, included in AI context]################
|
||||
action_text_split = [[x+" ", [], 0 if self.tokenizer is None else len(self.tokenizer.encode(x+" ")), False] for x in re.split("(?<=[.!?])\s+", action_text)]
|
||||
#The last action shouldn't have the extra space from the sentence splitting, so let's remove it
|
||||
action_text_split[-1][0] = action_text_split[-1][0][:-1]
|
||||
action_text_split[-1][2] = 0 if self.tokenizer is None else len(self.tokenizer.encode(action_text_split[-1][0]))
|
||||
|
||||
Action_Position = [-1, len(actions[-1])] #First element is the action item, second is how much text is left
|
||||
Sentence_Position = [0, len(action_text_split[0][0])]
|
||||
while True:
|
||||
advance_action = False
|
||||
advance_sentence = False
|
||||
if Action_Position[1] <= Sentence_Position[1]:
|
||||
#We have enough text in the sentence to completely cover the action. Advance it to the next action
|
||||
advance_action = True
|
||||
if Sentence_Position[1] <= Action_Position[1]:
|
||||
advance_sentence = True
|
||||
if Action_Position[0] not in action_text_split[Sentence_Position[0]][1]:
|
||||
#Since this action is in the sentence, add it to the list if it's not already there
|
||||
action_text_split[Sentence_Position[0]][1].append(Action_Position[0])
|
||||
#Fix the text length leftovers first since they interact with each other
|
||||
if not advance_action:
|
||||
Action_Position[1] -= Sentence_Position[1]
|
||||
if not advance_sentence:
|
||||
Sentence_Position[1] -= Action_Position[1]
|
||||
|
||||
if advance_action:
|
||||
Action_Position[0] += 1
|
||||
if Action_Position[0] >= max(actions):
|
||||
break
|
||||
Action_Position[1] = len(actions[Action_Position[0]])
|
||||
if advance_sentence:
|
||||
Sentence_Position[0] += 1
|
||||
if Sentence_Position[0] >= len(action_text_split):
|
||||
break
|
||||
Sentence_Position[1] = len(action_text_split[Sentence_Position[0]][0])
|
||||
#OK, action_text_split now contains a list of [sentence including trailing space if needed, [action IDs that sentence includes]]
|
||||
action_text_split = self.actions.to_sentences()
|
||||
|
||||
|
||||
#Add prompt lenght/text if we're set to always use prompt
|
||||
@@ -1339,6 +1299,50 @@ class KoboldStoryRegister(object):
|
||||
self.actions[action_id]["Options"][option_number]['Probabilities'].append(probabilities)
|
||||
process_variable_changes(self.socketio, "story", 'actions', {"id": action_id, 'action': self.actions[action_id]}, None)
|
||||
|
||||
def to_sentences(self):
|
||||
#we're going to split our actions by sentence for better context. We'll add in which actions the sentence covers. Prompt will be added at a -1 ID
|
||||
actions = {i: self.actions[i] for i in range(len(self.actions))}
|
||||
actions[-1] = self.story_settings.prompt
|
||||
action_text = self.__str__()
|
||||
action_text = "{}{}".format(self.story_settings.prompt, action_text)
|
||||
###########action_text_split = [sentence, actions used in sentence, token length, included in AI context]################
|
||||
action_text_split = [[x+" ", [], 0 if self.tokenizer is None else len(self.tokenizer.encode(x+" ")), False] for x in re.split("(?<=[.!?])\s+", action_text)]
|
||||
#The last action shouldn't have the extra space from the sentence splitting, so let's remove it
|
||||
action_text_split[-1][0] = action_text_split[-1][0][:-1]
|
||||
action_text_split[-1][2] = 0 if self.tokenizer is None else len(self.tokenizer.encode(action_text_split[-1][0]))
|
||||
|
||||
Action_Position = [-1, len(actions[-1])] #First element is the action item, second is how much text is left
|
||||
Sentence_Position = [0, len(action_text_split[0][0])]
|
||||
while True:
|
||||
advance_action = False
|
||||
advance_sentence = False
|
||||
if Action_Position[1] <= Sentence_Position[1]:
|
||||
#We have enough text in the sentence to completely cover the action. Advance it to the next action
|
||||
advance_action = True
|
||||
if Sentence_Position[1] <= Action_Position[1]:
|
||||
advance_sentence = True
|
||||
if Action_Position[0] not in action_text_split[Sentence_Position[0]][1]:
|
||||
#Since this action is in the sentence, add it to the list if it's not already there
|
||||
action_text_split[Sentence_Position[0]][1].append(Action_Position[0])
|
||||
#Fix the text length leftovers first since they interact with each other
|
||||
if not advance_action:
|
||||
Action_Position[1] -= Sentence_Position[1]
|
||||
if not advance_sentence:
|
||||
Sentence_Position[1] -= Action_Position[1]
|
||||
|
||||
if advance_action:
|
||||
Action_Position[0] += 1
|
||||
if Action_Position[0] >= max(actions):
|
||||
break
|
||||
Action_Position[1] = len(actions[Action_Position[0]])
|
||||
if advance_sentence:
|
||||
Sentence_Position[0] += 1
|
||||
if Sentence_Position[0] >= len(action_text_split):
|
||||
break
|
||||
Sentence_Position[1] = len(action_text_split[Sentence_Position[0]][0])
|
||||
#OK, action_text_split now contains a list of [sentence including trailing space if needed, [action IDs that sentence includes]]
|
||||
return action_text_split
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
new_variable = name not in self.__dict__
|
||||
old_value = getattr(self, name, None)
|
||||
|
Reference in New Issue
Block a user