From 04689df322445742b77b8e665eb9dc61198392c3 Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 12 Oct 2022 12:48:34 -0400 Subject: [PATCH] Token Streaming Fixes for UI1 --- aiserver.py | 12 ++++++--- koboldai_settings.py | 58 +++++++++++++++++-------------------------- static/application.js | 1 + 3 files changed, 32 insertions(+), 39 deletions(-) diff --git a/aiserver.py b/aiserver.py index f435d413..f95e0dd1 100644 --- a/aiserver.py +++ b/aiserver.py @@ -2148,7 +2148,11 @@ def patch_transformers(): if koboldai_vars.chatmode: return False - koboldai_vars.actions.stream_tokens([utils.decodenewlines(tokenizer.decode(x[-1])) for x in input_ids]) + + data = [applyoutputformatting(utils.decodenewlines(tokenizer.decode(x[-1])), no_sentence_trimming=True) for x in input_ids] + koboldai_vars.actions.stream_tokens(data) + if koboldai_settings.queue is not None: + koboldai_settings.queue.put(["from_server", {"cmd": "streamtoken", "data": data}, {"broadcast":True, "room":"UI_1"}]) return False class CoreStopper(StoppingCriteria): @@ -6125,7 +6129,7 @@ def applyinputformatting(txt): #==================================================================# # Applies chosen formatting options to text returned from AI #==================================================================# -def applyoutputformatting(txt): +def applyoutputformatting(txt, no_sentence_trimming=False): # Use standard quotes and apostrophes txt = utils.fixquotes(txt) @@ -6134,7 +6138,7 @@ def applyoutputformatting(txt): txt = koboldai_vars.acregex_ai.sub('', txt) # Trim incomplete sentences - if(koboldai_vars.frmttriminc and not koboldai_vars.chatmode): + if(koboldai_vars.frmttriminc and not koboldai_vars.chatmode and not no_sentence_trimming): txt = utils.trimincompletesentence(txt) # Replace blank lines if(koboldai_vars.frmtrmblln or koboldai_vars.chatmode): @@ -7040,7 +7044,7 @@ def load_story_v1(js): _filename = filename if(filename.endswith('.json')): _filename = filename[:-5] - leave_room(session['story']) + leave_room(session.get('story', 'default')) session['story'] = _filename join_room(_filename) #create the story diff --git a/koboldai_settings.py b/koboldai_settings.py index 8a7fff70..a426a350 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -485,8 +485,7 @@ class koboldai_vars(object): else: setattr(self._story_settings[self.get_story_name()], name, value) if name == 'tokenizer': - setattr(self._story_settings[self.get_story_name()].actions, name, value) - setattr(self._story_settings[self.get_story_name()].worldinfo_v2, name, value) + self._story_settings[self.get_story_name()].worldinfo_v2.recalc_token_length(None) setattr(self._story_settings[self.get_story_name()], name, value) def __getattr__(self, name): @@ -1114,22 +1113,18 @@ class KoboldStoryRegister(object): self.actions = {} #keys = "Selected Text", "WI Search Text", "Wi_highlighted_text", "Options", "Selected Text Length", "In AI Input", "Probabilities". #Options being a list of dict with keys of "text", "Pinned", "Previous Selection", "Edited", "Probabilities" self.action_count = -1 - self.tokenizer = tokenizer self.story_settings = story_settings for item in sequence: self.append(item) def reset(self, sequence=[]): - self.__init__(self.socketio, self.story_settings, self.koboldai_vars, sequence=sequence, tokenizer=self.tokenizer) + self.__init__(self.socketio, self.story_settings, self.koboldai_vars, sequence=sequence) def add_wi_to_action(action_id, key, content): #First check to see if we have the wi_highlighted_text variable if 'wi_highlighted_text' not in self.actions[action_id]: self.actions[action_id]['wi_highlighted_text'] = [{"text": self.actions[action_id]['Selected Text'], "WI matches": [], "WI Text": ""}] - - - def __str__(self): if len(self.actions) > 0: return "".join([x['Selected Text'] for ignore, x in sorted(self.actions.items())]) @@ -1180,8 +1175,8 @@ class KoboldStoryRegister(object): old = None self.actions[i] = {"Selected Text": text, "WI Search Text": re.sub("[^0-9a-z \'\"]", "", text), "Probabilities": [], "Options": []} - if self.tokenizer is not None: - self.actions[i]['Selected Text Length'] = len(self.tokenizer.encode(text)) + if self.koboldai_vars.tokenizer is not None: + self.actions[i]['Selected Text Length'] = len(self.koboldai_vars.tokenizer.encode(text)) else: self.actions[i]['Selected Text Length'] = 0 self.actions[i]["In AI Input"] = False @@ -1365,8 +1360,8 @@ class KoboldStoryRegister(object): self.actions[action_step]["WI Search Text"] = re.sub("[^0-9a-z \'\"]", "", self.actions[action_step]["Selected Text"]) if 'Probabilities' in self.actions[action_step]['Options'][option_number]: self.actions[action_step]["Probabilities"] = self.actions[action_step]['Options'][option_number]['Probabilities'] - if self.tokenizer is not None: - self.actions[action_step]['Selected Text Length'] = len(self.tokenizer.encode(self.actions[action_step]['Options'][option_number]['text'])) + if self.koboldai_vars.tokenizer is not None: + self.actions[action_step]['Selected Text Length'] = len(self.koboldai_vars.tokenizer.encode(self.actions[action_step]['Options'][option_number]['text'])) else: self.actions[action_step]['Selected Text Length'] = 0 del self.actions[action_step]['Options'][option_number] @@ -1446,10 +1441,10 @@ class KoboldStoryRegister(object): self.story_settings.gamesaved = False def recalc_token_length(self, action_id): - if self.tokenizer is not None: + if self.koboldai_vars.tokenizer is not None: if action_id in self.actions: if self.actions[action_id]['In AI Input']: - self.actions[action_id]['Selected Text Length'] = len(self.tokenizer.encode(self.actions[action_id]['Selected Text'])) + self.actions[action_id]['Selected Text Length'] = len(self.koboldai_vars.tokenizer.encode(self.actions[action_id]['Selected Text'])) process_variable_changes(self.socketio, "story", 'actions', {"id": action_id, 'action': self.actions[action_id]}, None) eventlet.sleep(0.01) else: @@ -1475,8 +1470,8 @@ class KoboldStoryRegister(object): self.actions[self.action_count+1]['Options'].append({"text": text_list[i], "Pinned": False, "Previous Selection": False, "Edited": False, "Probabilities": [], "stream_id": i}) #We need to see if this is the last token being streamed. If so due to the rely it will come in AFTER the actual trimmed final text overwriting it in the UI - if self.tokenizer is not None: - if len(self.tokenizer.encode(self.actions[self.action_count+1]["Options"][0]['text'])) != self.koboldai_vars.genamt: + if self.koboldai_vars.tokenizer is not None: + if len(self.koboldai_vars.tokenizer.encode(self.actions[self.action_count+1]["Options"][0]['text'])) != self.koboldai_vars.genamt: #process_variable_changes(self.socketio, "actions", "Options", {"id": self.action_count+1, "options": self.actions[self.action_count+1]["Options"]}, {"id": self.action_count+1, "options": None}) process_variable_changes(self.socketio, "story", 'actions', {"id": self.action_count+1, 'action': self.actions[self.action_count+1]}, None) else: @@ -1484,15 +1479,15 @@ class KoboldStoryRegister(object): #First we need to see if this is actually the prompt. If so we'll just not do streaming: if self.story_settings.prompt != "": if self.action_count+1 in self.actions: - if self.tokenizer is not None: - selected_text_length = len(self.tokenizer.encode(self.actions[self.action_count+1]['Selected Text'])) + if self.koboldai_vars.tokenizer is not None: + selected_text_length = len(self.koboldai_vars.tokenizer.encode(self.actions[self.action_count+1]['Selected Text'])) else: selected_text_length = 0 self.actions[self.action_count+1]['Selected Text'] = "{}{}".format(self.actions[self.action_count+1]['Selected Text'], text_list[0]) self.actions[self.action_count+1]['Selected Text Length'] = selected_text_length else: - if self.tokenizer is not None: - selected_text_length = len(self.tokenizer.encode(text_list[0])) + if self.koboldai_vars.tokenizer is not None: + selected_text_length = len(self.koboldai_vars.tokenizer.encode(text_list[0])) else: selected_text_length = 0 self.actions[self.action_count+1] = {"Selected Text": text_list[0], "Selected Text Length": selected_text_length, "Options": []} @@ -1500,7 +1495,7 @@ class KoboldStoryRegister(object): if self.tokenizer is not None: - if len(self.tokenizer.encode(self.actions[self.action_count+1]['Selected Text'])) != self.koboldai_vars.genamt: + if len(self.koboldai_vars.tokenizer.encode(self.actions[self.action_count+1]['Selected Text'])) != self.koboldai_vars.genamt: #process_variable_changes(self.socketio, "actions", "Options", {"id": self.action_count+1, "options": self.actions[self.action_count+1]["Options"]}, {"id": self.action_count+1, "options": None}) process_variable_changes(self.socketio, "story", 'actions', {"id": self.action_count+1, 'action': self.actions[self.action_count+1]}, None) @@ -1587,7 +1582,6 @@ class KoboldWorldInfo(object): def __init__(self, socketio, story_settings, koboldai_vars, tokenizer=None): self.socketio = socketio - self.tokenizer = tokenizer self.koboldai_vars = koboldai_vars self.world_info = {} self.world_info_folder = OrderedDict() @@ -1595,7 +1589,7 @@ class KoboldWorldInfo(object): self.story_settings = story_settings def reset(self): - self.__init__(self.socketio, self.story_settings, self.koboldai_vars, self.tokenizer) + self.__init__(self.socketio, self.story_settings, self.koboldai_vars) def __iter__(self): self.itter = -1 @@ -1620,14 +1614,14 @@ class KoboldWorldInfo(object): return len(self.world_info) def recalc_token_length(self, uid): - if self.tokenizer is not None: + if self.koboldai_vars.tokenizer is not None: if uid is not None: if uid in self.world_info: - self.world_info[uid]['token_length'] = len(self.tokenizer.encode(self.world_info[uid]['content'])) + self.world_info[uid]['token_length'] = len(self.koboldai_vars.tokenizer.encode(self.world_info[uid]['content'])) self.socketio.emit("world_info_entry", self.world_info[uid], broadcast=True, room="UI_2") else: for uid in self.world_info: - self.world_info[uid]['token_length'] = len(self.tokenizer.encode(self.world_info[uid]['content'])) + self.world_info[uid]['token_length'] = len(self.koboldai_vars.tokenizer.encode(self.world_info[uid]['content'])) self.send_to_ui() else: for uid in self.world_info: @@ -1703,8 +1697,8 @@ class KoboldWorldInfo(object): content = "{} ]".format(content[:-1]) else: content = manual_text - if self.tokenizer is not None: - token_length = len(self.tokenizer.encode(content)) + if self.koboldai_vars.tokenizer is not None: + token_length = len(self.koboldai_vars.tokenizer.encode(content)) else: token_length = 0 if folder is None: @@ -1774,8 +1768,8 @@ class KoboldWorldInfo(object): content = "{} ]".format(content[:-1]) else: content = manual_text - if self.tokenizer is not None: - token_length = len(self.tokenizer.encode(content)) + if self.koboldai_vars.tokenizer is not None: + token_length = len(self.koboldai_vars.tokenizer.encode(content)) else: token_length = 0 if folder is None: @@ -1971,12 +1965,6 @@ class KoboldWorldInfo(object): def get_used_wi(self): return [x['content'] for x in self.world_info if x['used_in_game']] - def __setattr__(self, name, value): - new_variable = name not in self.__dict__ - super().__setattr__(name, value) - if name == 'tokenizer' and not new_variable: - #We set the tokenizer, recalculate all of the item lengths - self.recalc_token_length(None) badwordsids_default = [[13460], [6880], [50256], [42496], [4613], [17414], [22039], [16410], [27], [29], [38430], [37922], [15913], [24618], [28725], [58], [47175], [36937], [26700], [12878], [16471], [37981], [5218], [29795], [13412], [45160], [3693], [49778], [4211], [20598], [36475], [33409], [44167], [32406], [29847], [29342], [42669], [685], [25787], [7359], [3784], [5320], [33994], [33490], [34516], [43734], [17635], [24293], [9959], [23785], [21737], [28401], [18161], [26358], [32509], [1279], [38155], [18189], [26894], [6927], [14610], [23834], [11037], [14631], [26933], [46904], [22330], [25915], [47934], [38214], [1875], [14692], [41832], [13163], [25970], [29565], [44926], [19841], [37250], [49029], [9609], [44438], [16791], [17816], [30109], [41888], [47527], [42924], [23984], [49074], [33717], [31161], [49082], [30138], [31175], [12240], [14804], [7131], [26076], [33250], [3556], [38381], [36338], [32756], [46581], [17912], [49146]] # Tokenized array of badwords used to prevent AI artifacting badwordsids_neox = [[0], [1], [44162], [9502], [12520], [31841], [36320], [49824], [34417], [6038], [34494], [24815], [26635], [24345], [3455], [28905], [44270], [17278], [32666], [46880], [7086], [43189], [37322], [17778], [20879], [49821], [3138], [14490], [4681], [21391], [26786], [43134], [9336], [683], [48074], [41256], [19181], [29650], [28532], [36487], [45114], [46275], [16445], [15104], [11337], [1168], [5647], [29], [27482], [44965], [43782], [31011], [42944], [47389], [6334], [17548], [38329], [32044], [35487], [2239], [34761], [7444], [1084], [12399], [18990], [17636], [39083], [1184], [35830], [28365], [16731], [43467], [47744], [1138], [16079], [40116], [45564], [18297], [42368], [5456], [18022], [42696], [34476], [23505], [23741], [39334], [37944], [45382], [38709], [33440], [26077], [43600], [34418], [36033], [6660], [48167], [48471], [15775], [19884], [41533], [1008], [31053], [36692], [46576], [20095], [20629], [31759], [46410], [41000], [13488], [30952], [39258], [16160], [27655], [22367], [42767], [43736], [49694], [13811], [12004], [46768], [6257], [37471], [5264], [44153], [33805], [20977], [21083], [25416], [14277], [31096], [42041], [18331], [33376], [22372], [46294], [28379], [38475], [1656], [5204], [27075], [50001], [16616], [11396], [7748], [48744], [35402], [28120], [41512], [4207], [43144], [14767], [15640], [16595], [41305], [44479], [38958], [18474], [22734], [30522], [46267], [60], [13976], [31830], [48701], [39822], [9014], [21966], [31422], [28052], [34607], [2479], [3851], [32214], [44082], [45507], [3001], [34368], [34758], [13380], [38363], [4299], [46802], [30996], [12630], [49236], [7082], [8795], [5218], [44740], [9686], [9983], [45301], [27114], [40125], [1570], [26997], [544], [5290], [49193], [23781], [14193], [40000], [2947], [43781], [9102], [48064], [42274], [18772], [49384], [9884], [45635], [43521], [31258], [32056], [47686], [21760], [13143], [10148], [26119], [44308], [31379], [36399], [23983], [46694], [36134], [8562], [12977], [35117], [28591], [49021], [47093], [28653], [29013], [46468], [8605], [7254], [25896], [5032], [8168], [36893], [38270], [20499], [27501], [34419], [29547], [28571], [36586], [20871], [30537], [26842], [21375], [31148], [27618], [33094], [3291], [31789], [28391], [870], [9793], [41361], [47916], [27468], [43856], [8850], [35237], [15707], [47552], [2730], [41449], [45488], [3073], [49806], [21938], [24430], [22747], [20924], [46145], [20481], [20197], [8239], [28231], [17987], [42804], [47269], [29972], [49884], [21382], [46295], [36676], [34616], [3921], [26991], [27720], [46265], [654], [9855], [40354], [5291], [34904], [44342], [2470], [14598], [880], [19282], [2498], [24237], [21431], [16369], [8994], [44524], [45662], [13663], [37077], [1447], [37786], [30863], [42854], [1019], [20322], [4398], [12159], [44072], [48664], [31547], [18736], [9259], [31], [16354], [21810], [4357], [37982], [5064], [2033], [32871], [47446], [62], [22158], [37387], [8743], [47007], [17981], [11049], [4622], [37916], [36786], [35138], [29925], [14157], [18095], [27829], [1181], [22226], [5709], [4725], [30189], [37014], [1254], [11380], [42989], [696], [24576], [39487], [30119], [1092], [8088], [2194], [9899], [14412], [21828], [3725], [13544], [5180], [44679], [34398], [3891], [28739], [14219], [37594], [49550], [11326], [6904], [17266], [5749], [10174], [23405], [9955], [38271], [41018], [13011], [48392], [36784], [24254], [21687], [23734], [5413], [41447], [45472], [10122], [17555], [15830], [47384], [12084], [31350], [47940], [11661], [27988], [45443], [905], [49651], [16614], [34993], [6781], [30803], [35869], [8001], [41604], [28118], [46462], [46762], [16262], [17281], [5774], [10943], [5013], [18257], [6750], [4713], [3951], [11899], [38791], [16943], [37596], [9318], [18413], [40473], [13208], [16375]] diff --git a/static/application.js b/static/application.js index 3c195e94..f121903f 100644 --- a/static/application.js +++ b/static/application.js @@ -2375,6 +2375,7 @@ $(document).ready(function(){ })(); $("body").addClass("connected"); } else if (msg.cmd == "streamtoken") { + console.log(msg); // Sometimes the stream_token messages will come in too late, after // we have recieved the full text. This leads to some stray tokens // appearing after the output. To combat this, we only allow tokens