From 6012d73b12dcd51bca675df73dd6607ffe0a41fb Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 22 Sep 2022 20:04:33 -0400
Subject: [PATCH] Test Summarization

---
 aiserver.py          | 121 ++++++++++++++++++++++++++++++-------------
 koboldai_settings.py |  86 +++++++++++++++---------------
 2 files changed, 130 insertions(+), 77 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 0e27bc08..d63da4b7 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -8379,7 +8379,6 @@ def UI_2_generate_image(data):
     
     #If we have > 4 keys, use those otherwise use sumarization
     if len(keys) < 4:
-        from transformers import pipeline as summary_pipeline
         start_time = time.time()
         #text to summarize:
         if len(koboldai_vars.actions) < 5:
@@ -8389,41 +8388,7 @@ def UI_2_generate_image(data):
             
         
         
-        if koboldai_vars.summarizer is None:
-            if os.path.exists("models/{}".format(args.summarizer_model.replace('/', '_'))):
-                koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
-                koboldai_vars.summarizer = AutoModelForSeq2SeqLM.from_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
-            else:
-                koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained(args.summarizer_model, cache_dir="cache")
-                koboldai_vars.summarizer = AutoModelForSeq2SeqLM.from_pretrained(args.summarizer_model, cache_dir="cache")
-                koboldai_vars.summary_tokenizer.save_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB")
-                koboldai_vars.summarizer.save_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB")
-
-        #Try GPU accel
-        if koboldai_vars.hascuda and torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved(0) >= 1645778560:
-            koboldai_vars.summarizer.to(0)
-            device=0
-        else:
-            device=-1
-        summarizer = tpool.execute(summary_pipeline, task="summarization", model=koboldai_vars.summarizer, tokenizer=koboldai_vars.summary_tokenizer, device=device)
-        logger.debug("Time to load summarizer: {}".format(time.time()-start_time))
-        
-        #Actual sumarization
-        start_time = time.time()
-        global old_transfomers_functions
-        temp = transformers.generation_utils.GenerationMixin._get_stopping_criteria
-        transformers.generation_utils.GenerationMixin._get_stopping_criteria = old_transfomers_functions['transformers.generation_utils.GenerationMixin._get_stopping_criteria']
-        keys = [tpool.execute(summarizer, text, max_length=args.max_summary_length, min_length=30, do_sample=False)[0]['summary_text']]
-        transformers.generation_utils.GenerationMixin._get_stopping_criteria = temp
-        logger.debug("Time to summarize: {}".format(time.time()-start_time))
-        #move model back to CPU to save precious vram
-        torch.cuda.empty_cache()
-        logger.debug("VRAM used by summarization: {}".format(torch.cuda.memory_reserved(0)))
-        koboldai_vars.summarizer.to("cpu")
-        torch.cuda.empty_cache()
-        
-        logger.debug("Original Text: {}".format(text))
-        logger.debug("Summarized Text: {}".format(keys[0]))
+        keys = [summarize(text, max_length=args.max_summary_length)]
     
     art_guide = 'fantasy illustration, artstation, by jason felix by steve argyle by tyler jacobson by peter mohrbacher, cinematic lighting', 
 
@@ -8571,9 +8536,93 @@ def get_items_locations_from_text(text):
     print("Locations: {}".format(loc))
     print("People: {}".format(per))
 
+#==================================================================#
+# summarizer
+#==================================================================#
+def summarize(text, max_length=100, min_length=30):
+    from transformers import pipeline as summary_pipeline
+    start_time = time.time()
+    if koboldai_vars.summarizer is None:
+        if os.path.exists("models/{}".format(args.summarizer_model.replace('/', '_'))):
+            koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
+            koboldai_vars.summarizer = AutoModelForSeq2SeqLM.from_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
+        else:
+            koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained(args.summarizer_model, cache_dir="cache")
+            koboldai_vars.summarizer = AutoModelForSeq2SeqLM.from_pretrained(args.summarizer_model, cache_dir="cache")
+            koboldai_vars.summary_tokenizer.save_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB")
+            koboldai_vars.summarizer.save_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB")
+
+    #Try GPU accel
+    if koboldai_vars.hascuda and torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved(0) >= 1645778560:
+        koboldai_vars.summarizer.to(0)
+        device=0
+    else:
+        device=-1
+    summarizer = tpool.execute(summary_pipeline, task="summarization", model=koboldai_vars.summarizer, tokenizer=koboldai_vars.summary_tokenizer, device=device)
+    logger.debug("Time to load summarizer: {}".format(time.time()-start_time))
+    
+    #Actual sumarization
+    start_time = time.time()
+    global old_transfomers_functions
+    temp = transformers.generation_utils.GenerationMixin._get_stopping_criteria
+    transformers.generation_utils.GenerationMixin._get_stopping_criteria = old_transfomers_functions['transformers.generation_utils.GenerationMixin._get_stopping_criteria']
+    #make sure text is less than 1024 tokens, otherwise we'll crash
+    if len(koboldai_vars.summary_tokenizer.encode(text)) > 1000:
+        text = koboldai_vars.summary_tokenizer.decode(koboldai_vars.summary_tokenizer.encode(text)[:1000])
+    output = tpool.execute(summarizer, text, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text']
+    transformers.generation_utils.GenerationMixin._get_stopping_criteria = temp
+    logger.debug("Time to summarize: {}".format(time.time()-start_time))
+    #move model back to CPU to save precious vram
+    torch.cuda.empty_cache()
+    logger.debug("VRAM used by summarization: {}".format(torch.cuda.memory_reserved(0)))
+    koboldai_vars.summarizer.to("cpu")
+    torch.cuda.empty_cache()
+    
+    logger.debug("Original Text: {}".format(text))
+    logger.debug("Summarized Text: {}".format(output))
+    
+    return output
+
+
+
 #==================================================================#
 # Test
 #==================================================================#
+@app.route("/summarize")
+def request_summarize():
+    if koboldai_vars.summary_tokenizer is None:
+        koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
+    #first, let's get all of our game text and split it into sentences
+    sentences = [x[0] for x in koboldai_vars.actions.to_sentences()]
+    sentences_lengths = [len(koboldai_vars.summary_tokenizer.encode(x)) for x in sentences]
+    
+    
+    while len(koboldai_vars.summary_tokenizer.encode("".join(sentences))) > 1000:
+        #Now let's split them into 1000 token chunks
+        summary_chunks = [""]
+        summary_chunk_lengths = [0]
+        for i in range(len(sentences)):
+            if summary_chunk_lengths[-1] + sentences_lengths[i] <= 1000:
+                summary_chunks[-1] += sentences[i]
+                summary_chunk_lengths[-1] += sentences_lengths[i]
+            else:
+                summary_chunks.append(sentences[i])
+                summary_chunk_lengths.append(sentences_lengths[i])
+        new_sentences = []
+        i=0
+        for summary_chunk in summary_chunks:
+            print("summarizing chunk {}".format(i))
+            new_sentences.extend(re.split("(?<=[.!?])\s+", summarize(summary_chunk)))
+            i+=1
+        print("Summarized to {} sentencees from {}".format(len(new_sentences), len(sentences)))
+        sentences = new_sentences
+    print("OK, doing final summarization")
+    output = summarize(" ".join(sentences))
+    print(output)
+    return "Input tokens: {}\nOutput tokens: {}\n{}".format(len(koboldai_vars.summary_tokenizer.encode(request.args['text'])), 
+                                                            len(koboldai_vars.summary_tokenizer.encode(output)), 
+                                                            output)
+
 @app.route("/vars")
 @logger.catch
 def show_vars():
diff --git a/koboldai_settings.py b/koboldai_settings.py
index 038765e5..8c1006bd 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -153,47 +153,7 @@ class koboldai_vars(object):
                     text += wi_text
         
         
-        #we're going to split our actions by sentence for better context. We'll add in which actions the sentence covers. Prompt will be added at a -1 ID
-        actions = {i: self.actions[i] for i in range(len(self.actions))}
-        actions[-1] = self.prompt
-        action_text = str(self.actions)
-        action_text = "{}{}".format(self.prompt, action_text)
-        ###########action_text_split = [sentence, actions used in sentence, token length, included in AI context]################
-        action_text_split = [[x+" ", [], 0 if self.tokenizer is None else len(self.tokenizer.encode(x+" ")), False] for x in re.split("(?<=[.!?])\s+", action_text)]
-        #The last action shouldn't have the extra space from the sentence splitting, so let's remove it
-        action_text_split[-1][0] = action_text_split[-1][0][:-1]
-        action_text_split[-1][2] = 0 if self.tokenizer is None else len(self.tokenizer.encode(action_text_split[-1][0]))
-        
-        Action_Position = [-1, len(actions[-1])] #First element is the action item, second is how much text is left
-        Sentence_Position = [0, len(action_text_split[0][0])]
-        while True:
-            advance_action = False
-            advance_sentence = False
-            if Action_Position[1] <= Sentence_Position[1]:
-                #We have enough text in the sentence to completely cover the action. Advance it to the next action
-                advance_action = True
-            if Sentence_Position[1] <= Action_Position[1]:
-                advance_sentence = True
-            if Action_Position[0] not in action_text_split[Sentence_Position[0]][1]:
-                #Since this action is in the sentence, add it to the list if it's not already there
-                action_text_split[Sentence_Position[0]][1].append(Action_Position[0])
-            #Fix the text length leftovers first since they interact with each other
-            if not advance_action:
-                Action_Position[1] -= Sentence_Position[1]
-            if not advance_sentence:
-                Sentence_Position[1] -= Action_Position[1]
-                
-            if advance_action:
-                Action_Position[0] += 1
-                if Action_Position[0] >= max(actions):
-                    break
-                Action_Position[1] = len(actions[Action_Position[0]])
-            if advance_sentence:
-                Sentence_Position[0] += 1
-                if Sentence_Position[0] >= len(action_text_split):
-                    break
-                Sentence_Position[1] = len(action_text_split[Sentence_Position[0]][0])
-        #OK, action_text_split now contains a list of [sentence including trailing space if needed, [action IDs that sentence includes]]
+        action_text_split = self.actions.to_sentences()
         
         
         #Add prompt lenght/text if we're set to always use prompt
@@ -1339,6 +1299,50 @@ class KoboldStoryRegister(object):
                 self.actions[action_id]["Options"][option_number]['Probabilities'].append(probabilities)
                 process_variable_changes(self.socketio, "story", 'actions', {"id": action_id, 'action':  self.actions[action_id]}, None)
     
+    def to_sentences(self):
+        #we're going to split our actions by sentence for better context. We'll add in which actions the sentence covers. Prompt will be added at a -1 ID
+        actions = {i: self.actions[i] for i in range(len(self.actions))}
+        actions[-1] = self.story_settings.prompt
+        action_text = self.__str__()
+        action_text = "{}{}".format(self.story_settings.prompt, action_text)
+        ###########action_text_split = [sentence, actions used in sentence, token length, included in AI context]################
+        action_text_split = [[x+" ", [], 0 if self.tokenizer is None else len(self.tokenizer.encode(x+" ")), False] for x in re.split("(?<=[.!?])\s+", action_text)]
+        #The last action shouldn't have the extra space from the sentence splitting, so let's remove it
+        action_text_split[-1][0] = action_text_split[-1][0][:-1]
+        action_text_split[-1][2] = 0 if self.tokenizer is None else len(self.tokenizer.encode(action_text_split[-1][0]))
+        
+        Action_Position = [-1, len(actions[-1])] #First element is the action item, second is how much text is left
+        Sentence_Position = [0, len(action_text_split[0][0])]
+        while True:
+            advance_action = False
+            advance_sentence = False
+            if Action_Position[1] <= Sentence_Position[1]:
+                #We have enough text in the sentence to completely cover the action. Advance it to the next action
+                advance_action = True
+            if Sentence_Position[1] <= Action_Position[1]:
+                advance_sentence = True
+            if Action_Position[0] not in action_text_split[Sentence_Position[0]][1]:
+                #Since this action is in the sentence, add it to the list if it's not already there
+                action_text_split[Sentence_Position[0]][1].append(Action_Position[0])
+            #Fix the text length leftovers first since they interact with each other
+            if not advance_action:
+                Action_Position[1] -= Sentence_Position[1]
+            if not advance_sentence:
+                Sentence_Position[1] -= Action_Position[1]
+                
+            if advance_action:
+                Action_Position[0] += 1
+                if Action_Position[0] >= max(actions):
+                    break
+                Action_Position[1] = len(actions[Action_Position[0]])
+            if advance_sentence:
+                Sentence_Position[0] += 1
+                if Sentence_Position[0] >= len(action_text_split):
+                    break
+                Sentence_Position[1] = len(action_text_split[Sentence_Position[0]][0])
+        #OK, action_text_split now contains a list of [sentence including trailing space if needed, [action IDs that sentence includes]]
+        return action_text_split
+    
     def __setattr__(self, name, value):
         new_variable = name not in self.__dict__
         old_value = getattr(self, name, None)