From 8de610df8cc245c704151f551e6eb3007e3c52eb Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Sun, 23 Jul 2023 17:32:52 -0500
Subject: [PATCH] Streaming: Rework single-gen streaming

Now has its own packet and element seperate from actions. Fixes several
bugs related to desyncing. Also adds smooth typing effect
(fixes https://github.com/henk717/KoboldAI/issues/263)
---
 aiserver.py          |  2 ++
 gensettings.py       | 17 +++++++++
 koboldai_settings.py | 42 ++++++++--------------
 static/koboldai.js   | 83 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 116 insertions(+), 28 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 0583c5b8..bea0d662 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -3470,6 +3470,8 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
                 set_aibusy(0)
                 emit('from_server', {'cmd': 'scrolldown', 'data': ''}, broadcast=True, room="UI_1")
                 break
+    # Clean up token stream
+    emit("stream_tokens", None, broadcast=True, room="UI_2")
 
 def apiactionsubmit_generate(txt, minimum, maximum):
     koboldai_vars.generated_tkns = 0
diff --git a/gensettings.py b/gensettings.py
index 8d68b4b5..09e1abcb 100644
--- a/gensettings.py
+++ b/gensettings.py
@@ -413,6 +413,23 @@ gensettingstf = [
     ,
     "ui_level": 2
  	},
+    {
+    "UI_V2_Only": True,
+ 	"uitype": "toggle",
+ 	"unit": "bool",
+ 	"label": "Smooth Streaming",
+ 	"id": "smoothstreaming",
+ 	"min": 0,
+ 	"max": 1,
+ 	"step": 1,
+ 	"default": 0,
+	"tooltip": "Makes Token Streaming type in characters, not tokens. Note that this is purely visual, and will likely increase delay in seeing the tokens.",
+    "menu_path": "Interface",
+    "sub_path": "UI",
+    "classname": "user",
+    "name": "smooth_streaming",
+    "ui_level": 1
+ 	},
     {
  	"uitype": "toggle",
  	"unit": "bool",
diff --git a/koboldai_settings.py b/koboldai_settings.py
index ebd8c019..25f4ea84 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -1155,6 +1155,7 @@ class user_settings(settings):
         self.nogenmod    = False
         self.debug       = False    # If set to true, will send debug information to the client for display
         self.output_streaming = True
+        self.smooth_streaming = False
         self.show_probs = False # Whether or not to show token probabilities
         self.beep_on_complete = False
         self.img_gen_priority = 1
@@ -1889,34 +1890,19 @@ class KoboldStoryRegister(object):
                     process_variable_changes(self._socketio, "story", 'actions', {"id": self.action_count+1, 'action':  self.actions[self.action_count+1]}, None)
         else:
             #We're streaming single options so our output is our selected
-            #First we need to see if this is actually the prompt. If so we'll just not do streaming:
-            if self.story_settings.prompt != "":
-                if self.action_count+1 in self.actions:
-                    if self._koboldai_vars.tokenizer is not None:
-                        selected_text_length = len(self._koboldai_vars.tokenizer.encode(self.actions[self.action_count+1]['Selected Text']))
-                    else:
-                        selected_text_length = 0
-                    self.actions[self.action_count+1]['Selected Text'] = "{}{}".format(self.actions[self.action_count+1]['Selected Text'], text_list[0])
-                    self.actions[self.action_count+1]['Selected Text Length'] = selected_text_length
-                else:
-                    if self._koboldai_vars.tokenizer is not None:
-                        selected_text_length = len(self._koboldai_vars.tokenizer.encode(text_list[0]))
-                    else:
-                        selected_text_length = 0
-                    self.actions[self.action_count+1] = {"Selected Text": text_list[0], "Selected Text Length": selected_text_length, "Options": [], "Time": int(time.time())}
-                
-                
-                
-                if self._koboldai_vars.tokenizer is not None:
-                    if len(self._koboldai_vars.tokenizer.encode(self.actions[self.action_count+1]['Selected Text'])) != self._koboldai_vars.genamt:
-                        #ui1
-                        if queue is not None:
-                            queue.put(["from_server", {"cmd": "streamtoken", "data": [{
-                                "decoded": text_list[0],
-                                "probabilities": self.probability_buffer
-                            }]}, {"broadcast":True, "room":"UI_1"}])
-                        #process_variable_changes(self._socketio, "actions", "Options", {"id": self.action_count+1, "options": self.actions[self.action_count+1]["Options"]}, {"id": self.action_count+1, "options": None})
-                        process_variable_changes(self._socketio, "story", 'actions', {"id": self.action_count+1, 'action':  self.actions[self.action_count+1]}, None)
+            queue.put(["stream_tokens", text_list, {"broadcast": True, "room": "UI_2"}])
+
+            # UI1
+            queue.put([
+                "from_server", {
+                    "cmd": "streamtoken",
+                    "data": [{
+                        "decoded": text_list[0],
+                        "probabilities": self.probability_buffer
+                    }],
+                },
+                {"broadcast":True, "room": "UI_1"}
+            ])
     
     def set_probabilities(self, probabilities, action_id=None):
         self.probability_buffer = probabilities
diff --git a/static/koboldai.js b/static/koboldai.js
index 83773799..83fba5b7 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -37,6 +37,7 @@ socket.on("debug_message", function(data){console.log(data);});
 socket.on("scratchpad_response", recieveScratchpadResponse);
 socket.on("show_error_notification", function(data) { reportError(data.title, data.text) });
 socket.on("generated_wi", showGeneratedWIData);
+socket.on("stream_tokens", stream_tokens);
 //socket.onAny(function(event_name, data) {console.log({"event": event_name, "class": data.classname, "data": data});});
 
 // Must be done before any elements are made; we track their changes.
@@ -85,6 +86,16 @@ var initial_socketio_connection_occured = false;
 var selected_model_data;
 var privacy_mode_enabled = false;
 
+var streaming = {
+	windowOpen: false,
+	buffer: "",
+	time: {
+		msBuffer: [10],
+		preTime: null,
+	},
+	typeyTimeout: null,
+};
+
 // Each entry into this array should be an object that looks like:
 // {class: "class", key: "key", func: callback}
 let sync_hooks = [];
@@ -518,6 +529,7 @@ function process_actions_data(data) {
 	game_text_scroll_timeout = setTimeout(run_infinite_scroll_update.bind(null, action_type, actions, first_action), 200);
 	clearTimeout(auto_loader_timeout);
 	
+	streaming.windowOpen = true;
 	
 	hide_show_prompt();
 	//console.log("Took "+((Date.now()-start_time)/1000)+"s to process");
@@ -3348,6 +3360,77 @@ function update_game_text(id, new_text) {
 	
 }
 
+function stream_tokens(tokens) {
+	// NOTE: This is only for genamt/batch size 1.
+	const smoothStreamingEnabled = $el("#user_smooth_streaming").checked;
+
+	let streamBuffer = $el("#token-stream-buffer");
+
+	if (!streaming.windowOpen) {
+		// Reject tokens sent after the streaming window is closed
+		return;
+	}
+
+	if (!tokens) {
+		// Server told us to close up shop!
+		streaming.windowOpen = false;
+		streaming.buffer = "";
+		clearTimeout(streaming.typeyTimeout);
+		streaming.typeyTimeout = null;
+		if (streamBuffer) streamBuffer.remove();
+		return;
+	}
+
+	if (!streamBuffer) {
+		// This should happen once at the beginning of the stream
+		streamBuffer = $e("span", $el(".gametext"), {
+			id: "token-stream-buffer",
+			classes: ["within_max_length"]
+		});
+	}
+
+	if (!smoothStreamingEnabled && streaming.typeyTimeout) {
+		streaming.buffer = "";
+		clearTimeout(streaming.typeyTimeout);
+		streaming.typeyTimeout = null;
+	}
+
+	if (!streaming.typeyTimeout && smoothStreamingEnabled) {
+		function _char() {
+			const times = streaming.time.msBuffer;
+			const avg = times.reduce((a, b) => a + b) / times.length;
+			// Get the average time (ms) it took the last 5 tokens to generate
+
+			if (!streaming.typeyTimeout) return;
+			if (!smoothStreamingEnabled) return;
+			streaming.typeyTimeout = setTimeout(_char, avg);
+
+			if (!streaming.buffer.length) return;
+
+			streamBuffer.textContent += streaming.buffer[0];
+			streaming.buffer = streaming.buffer.slice(1);
+		}
+
+		streaming.typeyTimeout = setTimeout(_char, 10);
+	}
+
+	if (!streaming.time.preTime) streaming.time.preTime = new Date();
+
+	streaming.time.msBuffer.push(
+		(new Date().getTime() - streaming.time.preTime.getTime()) / 5
+		// 5 chosen because Concedo said something about 5 this morning and it seems to work
+	);
+
+	if (streaming.time.msBuffer.length > 5) streaming.time.msBuffer.shift();
+	streaming.time.preTime = new Date();
+
+	if (smoothStreamingEnabled) {
+		streaming.buffer += tokens[0];
+	} else {
+		streamBuffer.textContent += tokens[0];
+	}
+}
+
 function save_preset() {
 	socket.emit("save_new_preset", {"preset": document.getElementById("new_preset_name").value, "description": document.getElementById("new_preset_description").value});
 	closePopups();