Streaming: Rework single-gen streaming

Now has its own packet and element seperate from actions. Fixes several bugs related to desyncing. Also adds smooth typing effect (fixes https://github.com/henk717/KoboldAI/issues/263)
2025-06-05 21:59:24 +02:00 · 2023-07-23 17:32:52 -05:00
parent 3aa677ce11
commit 8de610df8c
4 changed files with 116 additions and 28 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -3470,6 +3470,8 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
                set_aibusy(0)
                emit('from_server', {'cmd': 'scrolldown', 'data': ''}, broadcast=True, room="UI_1")
                break
+    # Clean up token stream
+    emit("stream_tokens", None, broadcast=True, room="UI_2")

 def apiactionsubmit_generate(txt, minimum, maximum):
    koboldai_vars.generated_tkns = 0
--- a/gensettings.py
+++ b/gensettings.py
@@ -413,6 +413,23 @@ gensettingstf = [
    ,
    "ui_level": 2
 	},
+    {
+    "UI_V2_Only": True,
+ 	"uitype": "toggle",
+ 	"unit": "bool",
+ 	"label": "Smooth Streaming",
+ 	"id": "smoothstreaming",
+ 	"min": 0,
+ 	"max": 1,
+ 	"step": 1,
+ 	"default": 0,
+	"tooltip": "Makes Token Streaming type in characters, not tokens. Note that this is purely visual, and will likely increase delay in seeing the tokens.",
+    "menu_path": "Interface",
+    "sub_path": "UI",
+    "classname": "user",
+    "name": "smooth_streaming",
+    "ui_level": 1
+ 	},
    {
 	"uitype": "toggle",
 	"unit": "bool",
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -1155,6 +1155,7 @@ class user_settings(settings):
        self.nogenmod    = False
        self.debug       = False    # If set to true, will send debug information to the client for display
        self.output_streaming = True
+        self.smooth_streaming = False
        self.show_probs = False # Whether or not to show token probabilities
        self.beep_on_complete = False
        self.img_gen_priority = 1
@@ -1889,34 +1890,19 @@ class KoboldStoryRegister(object):
                    process_variable_changes(self._socketio, "story", 'actions', {"id": self.action_count+1, 'action':  self.actions[self.action_count+1]}, None)
        else:
            #We're streaming single options so our output is our selected
-            #First we need to see if this is actually the prompt. If so we'll just not do streaming:
-            if self.story_settings.prompt != "":
-                if self.action_count+1 in self.actions:
-                    if self._koboldai_vars.tokenizer is not None:
-                        selected_text_length = len(self._koboldai_vars.tokenizer.encode(self.actions[self.action_count+1]['Selected Text']))
-                    else:
-                        selected_text_length = 0
-                    self.actions[self.action_count+1]['Selected Text'] = "{}{}".format(self.actions[self.action_count+1]['Selected Text'], text_list[0])
-                    self.actions[self.action_count+1]['Selected Text Length'] = selected_text_length
-                else:
-                    if self._koboldai_vars.tokenizer is not None:
-                        selected_text_length = len(self._koboldai_vars.tokenizer.encode(text_list[0]))
-                    else:
-                        selected_text_length = 0
-                    self.actions[self.action_count+1] = {"Selected Text": text_list[0], "Selected Text Length": selected_text_length, "Options": [], "Time": int(time.time())}
-                
-                
-                
-                if self._koboldai_vars.tokenizer is not None:
-                    if len(self._koboldai_vars.tokenizer.encode(self.actions[self.action_count+1]['Selected Text'])) != self._koboldai_vars.genamt:
-                        #ui1
-                        if queue is not None:
-                            queue.put(["from_server", {"cmd": "streamtoken", "data": [{
-                                "decoded": text_list[0],
-                                "probabilities": self.probability_buffer
-                            }]}, {"broadcast":True, "room":"UI_1"}])
-                        #process_variable_changes(self._socketio, "actions", "Options", {"id": self.action_count+1, "options": self.actions[self.action_count+1]["Options"]}, {"id": self.action_count+1, "options": None})
-                        process_variable_changes(self._socketio, "story", 'actions', {"id": self.action_count+1, 'action':  self.actions[self.action_count+1]}, None)
+            queue.put(["stream_tokens", text_list, {"broadcast": True, "room": "UI_2"}])
+
+            # UI1
+            queue.put([
+                "from_server", {
+                    "cmd": "streamtoken",
+                    "data": [{
+                        "decoded": text_list[0],
+                        "probabilities": self.probability_buffer
+                    }],
+                },
+                {"broadcast":True, "room": "UI_1"}
+            ])
    
    def set_probabilities(self, probabilities, action_id=None):
        self.probability_buffer = probabilities
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -37,6 +37,7 @@ socket.on("debug_message", function(data){console.log(data);});
 socket.on("scratchpad_response", recieveScratchpadResponse);
 socket.on("show_error_notification", function(data) { reportError(data.title, data.text) });
 socket.on("generated_wi", showGeneratedWIData);
+socket.on("stream_tokens", stream_tokens);
 //socket.onAny(function(event_name, data) {console.log({"event": event_name, "class": data.classname, "data": data});});

 // Must be done before any elements are made; we track their changes.
@@ -85,6 +86,16 @@ var initial_socketio_connection_occured = false;
 var selected_model_data;
 var privacy_mode_enabled = false;

+var streaming = {
+	windowOpen: false,
+	buffer: "",
+	time: {
+		msBuffer: [10],
+		preTime: null,
+	},
+	typeyTimeout: null,
+};
+
 // Each entry into this array should be an object that looks like:
 // {class: "class", key: "key", func: callback}
 let sync_hooks = [];
@@ -518,6 +529,7 @@ function process_actions_data(data) {
 	game_text_scroll_timeout = setTimeout(run_infinite_scroll_update.bind(null, action_type, actions, first_action), 200);
 	clearTimeout(auto_loader_timeout);
 	
+	streaming.windowOpen = true;
 	
 	hide_show_prompt();
 	//console.log("Took "+((Date.now()-start_time)/1000)+"s to process");
@@ -3348,6 +3360,77 @@ function update_game_text(id, new_text) {
 	
 }

+function stream_tokens(tokens) {
+	// NOTE: This is only for genamt/batch size 1.
+	const smoothStreamingEnabled = $el("#user_smooth_streaming").checked;
+
+	let streamBuffer = $el("#token-stream-buffer");
+
+	if (!streaming.windowOpen) {
+		// Reject tokens sent after the streaming window is closed
+		return;
+	}
+
+	if (!tokens) {
+		// Server told us to close up shop!
+		streaming.windowOpen = false;
+		streaming.buffer = "";
+		clearTimeout(streaming.typeyTimeout);
+		streaming.typeyTimeout = null;
+		if (streamBuffer) streamBuffer.remove();
+		return;
+	}
+
+	if (!streamBuffer) {
+		// This should happen once at the beginning of the stream
+		streamBuffer = $e("span", $el(".gametext"), {
+			id: "token-stream-buffer",
+			classes: ["within_max_length"]
+		});
+	}
+
+	if (!smoothStreamingEnabled && streaming.typeyTimeout) {
+		streaming.buffer = "";
+		clearTimeout(streaming.typeyTimeout);
+		streaming.typeyTimeout = null;
+	}
+
+	if (!streaming.typeyTimeout && smoothStreamingEnabled) {
+		function _char() {
+			const times = streaming.time.msBuffer;
+			const avg = times.reduce((a, b) => a + b) / times.length;
+			// Get the average time (ms) it took the last 5 tokens to generate
+
+			if (!streaming.typeyTimeout) return;
+			if (!smoothStreamingEnabled) return;
+			streaming.typeyTimeout = setTimeout(_char, avg);
+
+			if (!streaming.buffer.length) return;
+
+			streamBuffer.textContent += streaming.buffer[0];
+			streaming.buffer = streaming.buffer.slice(1);
+		}
+
+		streaming.typeyTimeout = setTimeout(_char, 10);
+	}
+
+	if (!streaming.time.preTime) streaming.time.preTime = new Date();
+
+	streaming.time.msBuffer.push(
+		(new Date().getTime() - streaming.time.preTime.getTime()) / 5
+		// 5 chosen because Concedo said something about 5 this morning and it seems to work
+	);
+
+	if (streaming.time.msBuffer.length > 5) streaming.time.msBuffer.shift();
+	streaming.time.preTime = new Date();
+
+	if (smoothStreamingEnabled) {
+		streaming.buffer += tokens[0];
+	} else {
+		streamBuffer.textContent += tokens[0];
+	}
+}
+
 function save_preset() {
 	socket.emit("save_new_preset", {"preset": document.getElementById("new_preset_name").value, "description": document.getElementById("new_preset_description").value});
 	closePopups();