Token Streaming Fixes for UI1

This commit is contained in:
ebolam
2022-10-12 12:48:34 -04:00
parent a615c1a3b9
commit 04689df322
3 changed files with 32 additions and 39 deletions

View File

@@ -2148,7 +2148,11 @@ def patch_transformers():
if koboldai_vars.chatmode:
return False
koboldai_vars.actions.stream_tokens([utils.decodenewlines(tokenizer.decode(x[-1])) for x in input_ids])
data = [applyoutputformatting(utils.decodenewlines(tokenizer.decode(x[-1])), no_sentence_trimming=True) for x in input_ids]
koboldai_vars.actions.stream_tokens(data)
if koboldai_settings.queue is not None:
koboldai_settings.queue.put(["from_server", {"cmd": "streamtoken", "data": data}, {"broadcast":True, "room":"UI_1"}])
return False
class CoreStopper(StoppingCriteria):
@@ -6125,7 +6129,7 @@ def applyinputformatting(txt):
#==================================================================#
# Applies chosen formatting options to text returned from AI
#==================================================================#
def applyoutputformatting(txt):
def applyoutputformatting(txt, no_sentence_trimming=False):
# Use standard quotes and apostrophes
txt = utils.fixquotes(txt)
@@ -6134,7 +6138,7 @@ def applyoutputformatting(txt):
txt = koboldai_vars.acregex_ai.sub('', txt)
# Trim incomplete sentences
if(koboldai_vars.frmttriminc and not koboldai_vars.chatmode):
if(koboldai_vars.frmttriminc and not koboldai_vars.chatmode and not no_sentence_trimming):
txt = utils.trimincompletesentence(txt)
# Replace blank lines
if(koboldai_vars.frmtrmblln or koboldai_vars.chatmode):
@@ -7040,7 +7044,7 @@ def load_story_v1(js):
_filename = filename
if(filename.endswith('.json')):
_filename = filename[:-5]
leave_room(session['story'])
leave_room(session.get('story', 'default'))
session['story'] = _filename
join_room(_filename)
#create the story

View File

@@ -485,8 +485,7 @@ class koboldai_vars(object):
else:
setattr(self._story_settings[self.get_story_name()], name, value)
if name == 'tokenizer':
setattr(self._story_settings[self.get_story_name()].actions, name, value)
setattr(self._story_settings[self.get_story_name()].worldinfo_v2, name, value)
self._story_settings[self.get_story_name()].worldinfo_v2.recalc_token_length(None)
setattr(self._story_settings[self.get_story_name()], name, value)
def __getattr__(self, name):
@@ -1114,22 +1113,18 @@ class KoboldStoryRegister(object):
self.actions = {} #keys = "Selected Text", "WI Search Text", "Wi_highlighted_text", "Options", "Selected Text Length", "In AI Input", "Probabilities".
#Options being a list of dict with keys of "text", "Pinned", "Previous Selection", "Edited", "Probabilities"
self.action_count = -1
self.tokenizer = tokenizer
self.story_settings = story_settings
for item in sequence:
self.append(item)
def reset(self, sequence=[]):
self.__init__(self.socketio, self.story_settings, self.koboldai_vars, sequence=sequence, tokenizer=self.tokenizer)
self.__init__(self.socketio, self.story_settings, self.koboldai_vars, sequence=sequence)
def add_wi_to_action(action_id, key, content):
#First check to see if we have the wi_highlighted_text variable
if 'wi_highlighted_text' not in self.actions[action_id]:
self.actions[action_id]['wi_highlighted_text'] = [{"text": self.actions[action_id]['Selected Text'], "WI matches": [], "WI Text": ""}]
def __str__(self):
if len(self.actions) > 0:
return "".join([x['Selected Text'] for ignore, x in sorted(self.actions.items())])
@@ -1180,8 +1175,8 @@ class KoboldStoryRegister(object):
old = None
self.actions[i] = {"Selected Text": text, "WI Search Text": re.sub("[^0-9a-z \'\"]", "", text), "Probabilities": [], "Options": []}
if self.tokenizer is not None:
self.actions[i]['Selected Text Length'] = len(self.tokenizer.encode(text))
if self.koboldai_vars.tokenizer is not None:
self.actions[i]['Selected Text Length'] = len(self.koboldai_vars.tokenizer.encode(text))
else:
self.actions[i]['Selected Text Length'] = 0
self.actions[i]["In AI Input"] = False
@@ -1365,8 +1360,8 @@ class KoboldStoryRegister(object):
self.actions[action_step]["WI Search Text"] = re.sub("[^0-9a-z \'\"]", "", self.actions[action_step]["Selected Text"])
if 'Probabilities' in self.actions[action_step]['Options'][option_number]:
self.actions[action_step]["Probabilities"] = self.actions[action_step]['Options'][option_number]['Probabilities']
if self.tokenizer is not None:
self.actions[action_step]['Selected Text Length'] = len(self.tokenizer.encode(self.actions[action_step]['Options'][option_number]['text']))
if self.koboldai_vars.tokenizer is not None:
self.actions[action_step]['Selected Text Length'] = len(self.koboldai_vars.tokenizer.encode(self.actions[action_step]['Options'][option_number]['text']))
else:
self.actions[action_step]['Selected Text Length'] = 0
del self.actions[action_step]['Options'][option_number]
@@ -1446,10 +1441,10 @@ class KoboldStoryRegister(object):
self.story_settings.gamesaved = False
def recalc_token_length(self, action_id):
if self.tokenizer is not None:
if self.koboldai_vars.tokenizer is not None:
if action_id in self.actions:
if self.actions[action_id]['In AI Input']:
self.actions[action_id]['Selected Text Length'] = len(self.tokenizer.encode(self.actions[action_id]['Selected Text']))
self.actions[action_id]['Selected Text Length'] = len(self.koboldai_vars.tokenizer.encode(self.actions[action_id]['Selected Text']))
process_variable_changes(self.socketio, "story", 'actions', {"id": action_id, 'action': self.actions[action_id]}, None)
eventlet.sleep(0.01)
else:
@@ -1475,8 +1470,8 @@ class KoboldStoryRegister(object):
self.actions[self.action_count+1]['Options'].append({"text": text_list[i], "Pinned": False, "Previous Selection": False, "Edited": False, "Probabilities": [], "stream_id": i})
#We need to see if this is the last token being streamed. If so due to the rely it will come in AFTER the actual trimmed final text overwriting it in the UI
if self.tokenizer is not None:
if len(self.tokenizer.encode(self.actions[self.action_count+1]["Options"][0]['text'])) != self.koboldai_vars.genamt:
if self.koboldai_vars.tokenizer is not None:
if len(self.koboldai_vars.tokenizer.encode(self.actions[self.action_count+1]["Options"][0]['text'])) != self.koboldai_vars.genamt:
#process_variable_changes(self.socketio, "actions", "Options", {"id": self.action_count+1, "options": self.actions[self.action_count+1]["Options"]}, {"id": self.action_count+1, "options": None})
process_variable_changes(self.socketio, "story", 'actions', {"id": self.action_count+1, 'action': self.actions[self.action_count+1]}, None)
else:
@@ -1484,15 +1479,15 @@ class KoboldStoryRegister(object):
#First we need to see if this is actually the prompt. If so we'll just not do streaming:
if self.story_settings.prompt != "":
if self.action_count+1 in self.actions:
if self.tokenizer is not None:
selected_text_length = len(self.tokenizer.encode(self.actions[self.action_count+1]['Selected Text']))
if self.koboldai_vars.tokenizer is not None:
selected_text_length = len(self.koboldai_vars.tokenizer.encode(self.actions[self.action_count+1]['Selected Text']))
else:
selected_text_length = 0
self.actions[self.action_count+1]['Selected Text'] = "{}{}".format(self.actions[self.action_count+1]['Selected Text'], text_list[0])
self.actions[self.action_count+1]['Selected Text Length'] = selected_text_length
else:
if self.tokenizer is not None:
selected_text_length = len(self.tokenizer.encode(text_list[0]))
if self.koboldai_vars.tokenizer is not None:
selected_text_length = len(self.koboldai_vars.tokenizer.encode(text_list[0]))
else:
selected_text_length = 0
self.actions[self.action_count+1] = {"Selected Text": text_list[0], "Selected Text Length": selected_text_length, "Options": []}
@@ -1500,7 +1495,7 @@ class KoboldStoryRegister(object):
if self.tokenizer is not None:
if len(self.tokenizer.encode(self.actions[self.action_count+1]['Selected Text'])) != self.koboldai_vars.genamt:
if len(self.koboldai_vars.tokenizer.encode(self.actions[self.action_count+1]['Selected Text'])) != self.koboldai_vars.genamt:
#process_variable_changes(self.socketio, "actions", "Options", {"id": self.action_count+1, "options": self.actions[self.action_count+1]["Options"]}, {"id": self.action_count+1, "options": None})
process_variable_changes(self.socketio, "story", 'actions', {"id": self.action_count+1, 'action': self.actions[self.action_count+1]}, None)
@@ -1587,7 +1582,6 @@ class KoboldWorldInfo(object):
def __init__(self, socketio, story_settings, koboldai_vars, tokenizer=None):
self.socketio = socketio
self.tokenizer = tokenizer
self.koboldai_vars = koboldai_vars
self.world_info = {}
self.world_info_folder = OrderedDict()
@@ -1595,7 +1589,7 @@ class KoboldWorldInfo(object):
self.story_settings = story_settings
def reset(self):
self.__init__(self.socketio, self.story_settings, self.koboldai_vars, self.tokenizer)
self.__init__(self.socketio, self.story_settings, self.koboldai_vars)
def __iter__(self):
self.itter = -1
@@ -1620,14 +1614,14 @@ class KoboldWorldInfo(object):
return len(self.world_info)
def recalc_token_length(self, uid):
if self.tokenizer is not None:
if self.koboldai_vars.tokenizer is not None:
if uid is not None:
if uid in self.world_info:
self.world_info[uid]['token_length'] = len(self.tokenizer.encode(self.world_info[uid]['content']))
self.world_info[uid]['token_length'] = len(self.koboldai_vars.tokenizer.encode(self.world_info[uid]['content']))
self.socketio.emit("world_info_entry", self.world_info[uid], broadcast=True, room="UI_2")
else:
for uid in self.world_info:
self.world_info[uid]['token_length'] = len(self.tokenizer.encode(self.world_info[uid]['content']))
self.world_info[uid]['token_length'] = len(self.koboldai_vars.tokenizer.encode(self.world_info[uid]['content']))
self.send_to_ui()
else:
for uid in self.world_info:
@@ -1703,8 +1697,8 @@ class KoboldWorldInfo(object):
content = "{} ]".format(content[:-1])
else:
content = manual_text
if self.tokenizer is not None:
token_length = len(self.tokenizer.encode(content))
if self.koboldai_vars.tokenizer is not None:
token_length = len(self.koboldai_vars.tokenizer.encode(content))
else:
token_length = 0
if folder is None:
@@ -1774,8 +1768,8 @@ class KoboldWorldInfo(object):
content = "{} ]".format(content[:-1])
else:
content = manual_text
if self.tokenizer is not None:
token_length = len(self.tokenizer.encode(content))
if self.koboldai_vars.tokenizer is not None:
token_length = len(self.koboldai_vars.tokenizer.encode(content))
else:
token_length = 0
if folder is None:
@@ -1971,12 +1965,6 @@ class KoboldWorldInfo(object):
def get_used_wi(self):
return [x['content'] for x in self.world_info if x['used_in_game']]
def __setattr__(self, name, value):
new_variable = name not in self.__dict__
super().__setattr__(name, value)
if name == 'tokenizer' and not new_variable:
#We set the tokenizer, recalculate all of the item lengths
self.recalc_token_length(None)
badwordsids_default = [[13460], [6880], [50256], [42496], [4613], [17414], [22039], [16410], [27], [29], [38430], [37922], [15913], [24618], [28725], [58], [47175], [36937], [26700], [12878], [16471], [37981], [5218], [29795], [13412], [45160], [3693], [49778], [4211], [20598], [36475], [33409], [44167], [32406], [29847], [29342], [42669], [685], [25787], [7359], [3784], [5320], [33994], [33490], [34516], [43734], [17635], [24293], [9959], [23785], [21737], [28401], [18161], [26358], [32509], [1279], [38155], [18189], [26894], [6927], [14610], [23834], [11037], [14631], [26933], [46904], [22330], [25915], [47934], [38214], [1875], [14692], [41832], [13163], [25970], [29565], [44926], [19841], [37250], [49029], [9609], [44438], [16791], [17816], [30109], [41888], [47527], [42924], [23984], [49074], [33717], [31161], [49082], [30138], [31175], [12240], [14804], [7131], [26076], [33250], [3556], [38381], [36338], [32756], [46581], [17912], [49146]] # Tokenized array of badwords used to prevent AI artifacting
badwordsids_neox = [[0], [1], [44162], [9502], [12520], [31841], [36320], [49824], [34417], [6038], [34494], [24815], [26635], [24345], [3455], [28905], [44270], [17278], [32666], [46880], [7086], [43189], [37322], [17778], [20879], [49821], [3138], [14490], [4681], [21391], [26786], [43134], [9336], [683], [48074], [41256], [19181], [29650], [28532], [36487], [45114], [46275], [16445], [15104], [11337], [1168], [5647], [29], [27482], [44965], [43782], [31011], [42944], [47389], [6334], [17548], [38329], [32044], [35487], [2239], [34761], [7444], [1084], [12399], [18990], [17636], [39083], [1184], [35830], [28365], [16731], [43467], [47744], [1138], [16079], [40116], [45564], [18297], [42368], [5456], [18022], [42696], [34476], [23505], [23741], [39334], [37944], [45382], [38709], [33440], [26077], [43600], [34418], [36033], [6660], [48167], [48471], [15775], [19884], [41533], [1008], [31053], [36692], [46576], [20095], [20629], [31759], [46410], [41000], [13488], [30952], [39258], [16160], [27655], [22367], [42767], [43736], [49694], [13811], [12004], [46768], [6257], [37471], [5264], [44153], [33805], [20977], [21083], [25416], [14277], [31096], [42041], [18331], [33376], [22372], [46294], [28379], [38475], [1656], [5204], [27075], [50001], [16616], [11396], [7748], [48744], [35402], [28120], [41512], [4207], [43144], [14767], [15640], [16595], [41305], [44479], [38958], [18474], [22734], [30522], [46267], [60], [13976], [31830], [48701], [39822], [9014], [21966], [31422], [28052], [34607], [2479], [3851], [32214], [44082], [45507], [3001], [34368], [34758], [13380], [38363], [4299], [46802], [30996], [12630], [49236], [7082], [8795], [5218], [44740], [9686], [9983], [45301], [27114], [40125], [1570], [26997], [544], [5290], [49193], [23781], [14193], [40000], [2947], [43781], [9102], [48064], [42274], [18772], [49384], [9884], [45635], [43521], [31258], [32056], [47686], [21760], [13143], [10148], [26119], [44308], [31379], [36399], [23983], [46694], [36134], [8562], [12977], [35117], [28591], [49021], [47093], [28653], [29013], [46468], [8605], [7254], [25896], [5032], [8168], [36893], [38270], [20499], [27501], [34419], [29547], [28571], [36586], [20871], [30537], [26842], [21375], [31148], [27618], [33094], [3291], [31789], [28391], [870], [9793], [41361], [47916], [27468], [43856], [8850], [35237], [15707], [47552], [2730], [41449], [45488], [3073], [49806], [21938], [24430], [22747], [20924], [46145], [20481], [20197], [8239], [28231], [17987], [42804], [47269], [29972], [49884], [21382], [46295], [36676], [34616], [3921], [26991], [27720], [46265], [654], [9855], [40354], [5291], [34904], [44342], [2470], [14598], [880], [19282], [2498], [24237], [21431], [16369], [8994], [44524], [45662], [13663], [37077], [1447], [37786], [30863], [42854], [1019], [20322], [4398], [12159], [44072], [48664], [31547], [18736], [9259], [31], [16354], [21810], [4357], [37982], [5064], [2033], [32871], [47446], [62], [22158], [37387], [8743], [47007], [17981], [11049], [4622], [37916], [36786], [35138], [29925], [14157], [18095], [27829], [1181], [22226], [5709], [4725], [30189], [37014], [1254], [11380], [42989], [696], [24576], [39487], [30119], [1092], [8088], [2194], [9899], [14412], [21828], [3725], [13544], [5180], [44679], [34398], [3891], [28739], [14219], [37594], [49550], [11326], [6904], [17266], [5749], [10174], [23405], [9955], [38271], [41018], [13011], [48392], [36784], [24254], [21687], [23734], [5413], [41447], [45472], [10122], [17555], [15830], [47384], [12084], [31350], [47940], [11661], [27988], [45443], [905], [49651], [16614], [34993], [6781], [30803], [35869], [8001], [41604], [28118], [46462], [46762], [16262], [17281], [5774], [10943], [5013], [18257], [6750], [4713], [3951], [11899], [38791], [16943], [37596], [9318], [18413], [40473], [13208], [16375]]

View File

@@ -2375,6 +2375,7 @@ $(document).ready(function(){
})();
$("body").addClass("connected");
} else if (msg.cmd == "streamtoken") {
console.log(msg);
// Sometimes the stream_token messages will come in too late, after
// we have recieved the full text. This leads to some stray tokens
// appearing after the output. To combat this, we only allow tokens