From 08fac1157a7965971059271d9bd432d85c4c4539 Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 1 Sep 2022 09:27:55 -0400 Subject: [PATCH] Fix for token streaming not visually showing sentence trimming correctly --- aiserver.py | 2 +- koboldai_settings.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/aiserver.py b/aiserver.py index e3627166..783ea572 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1828,7 +1828,7 @@ def patch_transformers(): #koboldai_vars.actions.stream_token(tokenizer_text, batch=batch) if koboldai_vars.output_streaming: - koboldai_vars.actions.stream_tokens([utils.decodenewlines(tokenizer.decode(x[-1])) for x in input_ids]) + koboldai_vars.actions.stream_tokens([utils.decodenewlines(tokenizer.decode(x[-1])) for x in input_ids], koboldai_vars.genamt) #if len(input_ids) > 1: # koboldai_vars.actions.clear_unused_options() # koboldai_vars.actions.append_options([utils.decodenewlines(tokenizer.decode(x[-1])) for x in input_ids]) diff --git a/koboldai_settings.py b/koboldai_settings.py index afa749b5..535e54a8 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1053,7 +1053,7 @@ class KoboldStoryRegister(object): process_variable_changes(self.socketio, "story", 'actions', {"id": key, 'action': self.actions[key]}, None) ignore = self.koboldai_vars.calc_ai_text() - def stream_tokens(self, text_list): + def stream_tokens(self, text_list, max_tokens): if len(text_list) > 1: if self.action_count+1 in self.actions: for i in range(len(text_list)): @@ -1070,8 +1070,11 @@ class KoboldStoryRegister(object): for i in range(len(text_list)): self.actions[self.action_count+1]['Options'].append({"text": text_list[i], "Pinned": False, "Previous Selection": False, "Edited": False, "Probabilities": [], "stream_id": i}) - process_variable_changes(self.socketio, "actions", "Options", {"id": self.action_count+1, "options": self.actions[self.action_count+1]["Options"]}, {"id": self.action_count+1, "options": None}) - process_variable_changes(self.socketio, "story", 'actions', {"id": self.action_count+1, 'action': self.actions[self.action_count+1]}, None) + #We need to see if this is the last token being streamed. If so due to the rely it will come in AFTER the actual trimmed final text overwriting it in the UI + if self.tokenizer is not None: + if len(self.tokenizer.encode(self.actions[self.action_count+1]["Options"][0]['text'])) != max_tokens: + #process_variable_changes(self.socketio, "actions", "Options", {"id": self.action_count+1, "options": self.actions[self.action_count+1]["Options"]}, {"id": self.action_count+1, "options": None}) + process_variable_changes(self.socketio, "story", 'actions', {"id": self.action_count+1, 'action': self.actions[self.action_count+1]}, None) else: #We're streaming single options so our output is our selected if self.tokenizer is not None: