Streaming: Rework single-gen streaming

Now has its own packet and element seperate from actions. Fixes several
bugs related to desyncing. Also adds smooth typing effect
(fixes https://github.com/henk717/KoboldAI/issues/263)
This commit is contained in:
somebody
2023-07-23 17:32:52 -05:00
parent 3aa677ce11
commit 8de610df8c
4 changed files with 116 additions and 28 deletions

View File

@@ -3470,6 +3470,8 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
set_aibusy(0)
emit('from_server', {'cmd': 'scrolldown', 'data': ''}, broadcast=True, room="UI_1")
break
# Clean up token stream
emit("stream_tokens", None, broadcast=True, room="UI_2")
def apiactionsubmit_generate(txt, minimum, maximum):
koboldai_vars.generated_tkns = 0

View File

@@ -413,6 +413,23 @@ gensettingstf = [
,
"ui_level": 2
},
{
"UI_V2_Only": True,
"uitype": "toggle",
"unit": "bool",
"label": "Smooth Streaming",
"id": "smoothstreaming",
"min": 0,
"max": 1,
"step": 1,
"default": 0,
"tooltip": "Makes Token Streaming type in characters, not tokens. Note that this is purely visual, and will likely increase delay in seeing the tokens.",
"menu_path": "Interface",
"sub_path": "UI",
"classname": "user",
"name": "smooth_streaming",
"ui_level": 1
},
{
"uitype": "toggle",
"unit": "bool",

View File

@@ -1155,6 +1155,7 @@ class user_settings(settings):
self.nogenmod = False
self.debug = False # If set to true, will send debug information to the client for display
self.output_streaming = True
self.smooth_streaming = False
self.show_probs = False # Whether or not to show token probabilities
self.beep_on_complete = False
self.img_gen_priority = 1
@@ -1889,34 +1890,19 @@ class KoboldStoryRegister(object):
process_variable_changes(self._socketio, "story", 'actions', {"id": self.action_count+1, 'action': self.actions[self.action_count+1]}, None)
else:
#We're streaming single options so our output is our selected
#First we need to see if this is actually the prompt. If so we'll just not do streaming:
if self.story_settings.prompt != "":
if self.action_count+1 in self.actions:
if self._koboldai_vars.tokenizer is not None:
selected_text_length = len(self._koboldai_vars.tokenizer.encode(self.actions[self.action_count+1]['Selected Text']))
else:
selected_text_length = 0
self.actions[self.action_count+1]['Selected Text'] = "{}{}".format(self.actions[self.action_count+1]['Selected Text'], text_list[0])
self.actions[self.action_count+1]['Selected Text Length'] = selected_text_length
else:
if self._koboldai_vars.tokenizer is not None:
selected_text_length = len(self._koboldai_vars.tokenizer.encode(text_list[0]))
else:
selected_text_length = 0
self.actions[self.action_count+1] = {"Selected Text": text_list[0], "Selected Text Length": selected_text_length, "Options": [], "Time": int(time.time())}
if self._koboldai_vars.tokenizer is not None:
if len(self._koboldai_vars.tokenizer.encode(self.actions[self.action_count+1]['Selected Text'])) != self._koboldai_vars.genamt:
#ui1
if queue is not None:
queue.put(["from_server", {"cmd": "streamtoken", "data": [{
"decoded": text_list[0],
"probabilities": self.probability_buffer
}]}, {"broadcast":True, "room":"UI_1"}])
#process_variable_changes(self._socketio, "actions", "Options", {"id": self.action_count+1, "options": self.actions[self.action_count+1]["Options"]}, {"id": self.action_count+1, "options": None})
process_variable_changes(self._socketio, "story", 'actions', {"id": self.action_count+1, 'action': self.actions[self.action_count+1]}, None)
queue.put(["stream_tokens", text_list, {"broadcast": True, "room": "UI_2"}])
# UI1
queue.put([
"from_server", {
"cmd": "streamtoken",
"data": [{
"decoded": text_list[0],
"probabilities": self.probability_buffer
}],
},
{"broadcast":True, "room": "UI_1"}
])
def set_probabilities(self, probabilities, action_id=None):
self.probability_buffer = probabilities

View File

@@ -37,6 +37,7 @@ socket.on("debug_message", function(data){console.log(data);});
socket.on("scratchpad_response", recieveScratchpadResponse);
socket.on("show_error_notification", function(data) { reportError(data.title, data.text) });
socket.on("generated_wi", showGeneratedWIData);
socket.on("stream_tokens", stream_tokens);
//socket.onAny(function(event_name, data) {console.log({"event": event_name, "class": data.classname, "data": data});});
// Must be done before any elements are made; we track their changes.
@@ -85,6 +86,16 @@ var initial_socketio_connection_occured = false;
var selected_model_data;
var privacy_mode_enabled = false;
var streaming = {
windowOpen: false,
buffer: "",
time: {
msBuffer: [10],
preTime: null,
},
typeyTimeout: null,
};
// Each entry into this array should be an object that looks like:
// {class: "class", key: "key", func: callback}
let sync_hooks = [];
@@ -518,6 +529,7 @@ function process_actions_data(data) {
game_text_scroll_timeout = setTimeout(run_infinite_scroll_update.bind(null, action_type, actions, first_action), 200);
clearTimeout(auto_loader_timeout);
streaming.windowOpen = true;
hide_show_prompt();
//console.log("Took "+((Date.now()-start_time)/1000)+"s to process");
@@ -3348,6 +3360,77 @@ function update_game_text(id, new_text) {
}
function stream_tokens(tokens) {
// NOTE: This is only for genamt/batch size 1.
const smoothStreamingEnabled = $el("#user_smooth_streaming").checked;
let streamBuffer = $el("#token-stream-buffer");
if (!streaming.windowOpen) {
// Reject tokens sent after the streaming window is closed
return;
}
if (!tokens) {
// Server told us to close up shop!
streaming.windowOpen = false;
streaming.buffer = "";
clearTimeout(streaming.typeyTimeout);
streaming.typeyTimeout = null;
if (streamBuffer) streamBuffer.remove();
return;
}
if (!streamBuffer) {
// This should happen once at the beginning of the stream
streamBuffer = $e("span", $el(".gametext"), {
id: "token-stream-buffer",
classes: ["within_max_length"]
});
}
if (!smoothStreamingEnabled && streaming.typeyTimeout) {
streaming.buffer = "";
clearTimeout(streaming.typeyTimeout);
streaming.typeyTimeout = null;
}
if (!streaming.typeyTimeout && smoothStreamingEnabled) {
function _char() {
const times = streaming.time.msBuffer;
const avg = times.reduce((a, b) => a + b) / times.length;
// Get the average time (ms) it took the last 5 tokens to generate
if (!streaming.typeyTimeout) return;
if (!smoothStreamingEnabled) return;
streaming.typeyTimeout = setTimeout(_char, avg);
if (!streaming.buffer.length) return;
streamBuffer.textContent += streaming.buffer[0];
streaming.buffer = streaming.buffer.slice(1);
}
streaming.typeyTimeout = setTimeout(_char, 10);
}
if (!streaming.time.preTime) streaming.time.preTime = new Date();
streaming.time.msBuffer.push(
(new Date().getTime() - streaming.time.preTime.getTime()) / 5
// 5 chosen because Concedo said something about 5 this morning and it seems to work
);
if (streaming.time.msBuffer.length > 5) streaming.time.msBuffer.shift();
streaming.time.preTime = new Date();
if (smoothStreamingEnabled) {
streaming.buffer += tokens[0];
} else {
streamBuffer.textContent += tokens[0];
}
}
function save_preset() {
socket.emit("save_new_preset", {"preset": document.getElementById("new_preset_name").value, "description": document.getElementById("new_preset_description").value});
closePopups();