From 8494aff76c6076db125abf1ee924c940f0ed2c62 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 15 Sep 2023 12:58:07 -0400
Subject: [PATCH 01/20] Fix for message about attention mask

---
 modeling/inference_models/hf_torch.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index fcdd9fb9..f5b5cfae 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -351,6 +351,7 @@ class HFTorchInferenceModel(HFInferenceModel):
                     bad_words_ids=self.active_badwordsids,
                     use_cache=True,
                     num_return_sequences=batch_count,
+                    pad_token_id=self.tokenizer.eos_token_id,
                 )
             else:
                  genout = self.model.generate(

From d838c966ee289b4ded912420c7c73d8b74374e55 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Sat, 16 Sep 2023 19:41:56 -0400
Subject: [PATCH 02/20] update for tortoise tts

---
 koboldai_settings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/koboldai_settings.py b/koboldai_settings.py
index b7408b93..80bfa5c2 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -2121,7 +2121,7 @@ class KoboldStoryRegister(object):
         sample_rate = 24000
         speaker = 'train_daws'
         if self.tortoise is None and importlib.util.find_spec("tortoise") is not None:
-           self.tortoise=api.TextToSpeech()
+           self.tortoise=api.TextToSpeech(use_deepspeed=True, kv_cache=True, half=True)
         
         if importlib.util.find_spec("tortoise") is not None:
             voice_samples, conditioning_latents = load_voices([speaker])

From 92a24a9f1e029e63280e152b80ef580a76763260 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Sat, 16 Sep 2023 20:32:46 -0400
Subject: [PATCH 03/20] Changed tortoise tts to use environmental variables to
 overwrite defaults

---
 koboldai_settings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/koboldai_settings.py b/koboldai_settings.py
index 80bfa5c2..7297c336 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -2121,7 +2121,7 @@ class KoboldStoryRegister(object):
         sample_rate = 24000
         speaker = 'train_daws'
         if self.tortoise is None and importlib.util.find_spec("tortoise") is not None:
-           self.tortoise=api.TextToSpeech(use_deepspeed=True, kv_cache=True, half=True)
+           self.tortoise=api.TextToSpeech(use_deepspeed=os.environ.get('deepspeed', "true").lower()=="true", kv_cache=os.environ.get('kv_cache', "true").lower()=="true", half=True)
         
         if importlib.util.find_spec("tortoise") is not None:
             voice_samples, conditioning_latents = load_voices([speaker])

From 81705e4318067d6726d74e4f42f5697afcc02e87 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Sat, 16 Sep 2023 20:37:15 -0400
Subject: [PATCH 04/20] Fix for blank actions in TTS export

---
 koboldai_settings.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/koboldai_settings.py b/koboldai_settings.py
index 7297c336..ff097a76 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -2145,7 +2145,8 @@ class KoboldStoryRegister(object):
                         output = pydub.AudioSegment(np.int16(audio * 2 ** 15).tobytes(), frame_rate=sample_rate, sample_width=2, channels=channels)
                     else:
                         output = output + pydub.AudioSegment(np.int16(audio * 2 ** 15).tobytes(), frame_rate=sample_rate, sample_width=2, channels=channels)
-                output.export(filename, format="ogg", bitrate="16k")
+                if output is not None:
+                    output.export(filename, format="ogg", bitrate="16k")
                 logger.info("Slow audio took {} for {} characters".format(time.time()-start_time, text_length))
     
     def gen_all_audio(self, overwrite=False):

From 92755a03dafa0a504cb3b3f6f681089601da00ce Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Sat, 16 Sep 2023 20:51:29 -0400
Subject: [PATCH 05/20] Better text splitting for TTS

---
 koboldai_settings.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/koboldai_settings.py b/koboldai_settings.py
index ff097a76..ad819abe 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -2114,7 +2114,8 @@ class KoboldStoryRegister(object):
                         output = pydub.AudioSegment(np.int16(audio * 2 ** 15).tobytes(), frame_rate=sample_rate, sample_width=2, channels=channels)
                     else:
                         output = output + pydub.AudioSegment(np.int16(audio * 2 ** 15).tobytes(), frame_rate=sample_rate, sample_width=2, channels=channels)
-                output.export(filename, format="ogg", bitrate="16k")
+                if output is not None:
+                    output.export(filename, format="ogg", bitrate="16k")
     
     def create_wave_slow(self, make_audio_queue_slow):
         import pydub
@@ -2133,7 +2134,7 @@ class KoboldStoryRegister(object):
                 if text.strip() == "":
                     shutil.copy("data/empty_audio.ogg", filename)
                 else:
-                    if len(text) > 20000:
+                    if len(text) > 400:
                         text = self.sentence_re.findall(text)
                     else:
                         text = [text]

From b9600e432940abfde4f2b28bfec7311b7c1322a8 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Sat, 16 Sep 2023 21:17:26 -0400
Subject: [PATCH 06/20] better text size handling for tortoise

---
 koboldai_settings.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/koboldai_settings.py b/koboldai_settings.py
index ad819abe..5a9201d7 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -2134,8 +2134,17 @@ class KoboldStoryRegister(object):
                 if text.strip() == "":
                     shutil.copy("data/empty_audio.ogg", filename)
                 else:
-                    if len(text) > 400:
+                    if len(self.tortoise.tokenizer.encode(text)) > 400:
                         text = self.sentence_re.findall(text)
+                        i=0
+                        while i <= len(text)-2:
+                            if len(self.tortoise.tokenizer.encode(text[i] + text[i+1])) < 400:
+                                text[i] = text[i] + text[i+1]
+                                del text[i+1]
+                            else:
+                                i+=1
+                                    
+                        
                     else:
                         text = [text]
                 output = None

From bb5830222d6adf5f5ab507258d018e523769e9f5 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 27 Sep 2023 13:56:16 -0400
Subject: [PATCH 07/20] Add story double click to load

---
 static/koboldai.js | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/static/koboldai.js b/static/koboldai.js
index 038b6e87..71b0a1a0 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1387,6 +1387,29 @@ function redrawPopup() {
 				}
 				this.parentElement.classList.add("selected");
 			};
+			td.ondblclick = function () {
+				let accept = document.getElementById("popup_accept");
+				if (this.getAttribute("valid") == "true") {
+					accept.classList.remove("disabled");
+					accept.disabled = false;
+					accept.setAttribute("selected_value", this.id);
+					socket.emit(document.getElementById("popup_accept").getAttribute("emit"), this.id);
+					closePopups();
+				} else {
+					accept.setAttribute("selected_value", "");
+					accept.classList.add("disabled");
+					accept.disabled = true;
+					if (this.getAttribute("folder") == "true") {
+						socket.emit("popup_change_folder", this.id);
+					}
+				}
+
+				let popup_list = document.getElementById('popup_list').getElementsByClassName("selected");
+				for (item of popup_list) {
+					item.classList.remove("selected");
+				}
+				this.parentElement.classList.add("selected");
+			};
 			tr.append(td);
 		}
 

From 8944f69e4bd1f6bc2509d4e1e0c423d1b0209de8 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 27 Sep 2023 13:58:06 -0400
Subject: [PATCH 08/20] More Audio enhancements

---
 aiserver.py              | 8 +++++---
 koboldai_settings.py     | 2 ++
 templates/index_new.html | 1 +
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index b656f646..2457cb3e 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -7923,9 +7923,11 @@ def UI_2_audio():
         start_time = time.time()
         while not os.path.exists(filename) and time.time()-start_time < 60: #Waiting up to 60 seconds for the file to be generated
             time.sleep(0.1)
-    return send_file(
-             filename, 
-             mimetype="audio/ogg")
+    if os.path.exists(filename):
+        return send_file(
+                filename, 
+                mimetype="audio/ogg")
+    show_error_notification("Error generating audio chunk", f"Something happened. Maybe check the log?")
 
 
 #==================================================================#
diff --git a/koboldai_settings.py b/koboldai_settings.py
index b7408b93..aa5a39c3 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -2058,6 +2058,7 @@ class KoboldStoryRegister(object):
             if action_id is None:
                 action_id = self.action_count
 
+            logger.info("Got request to generate audio for {}".format(action_id))
             if self.tts_model is None:
                 language = 'en'
                 model_id = 'v3_en'
@@ -2071,6 +2072,7 @@ class KoboldStoryRegister(object):
             filename = os.path.join(self._koboldai_vars.save_paths.generated_audio, f"{action_id}.ogg")
             filename_slow = os.path.join(self._koboldai_vars.save_paths.generated_audio, f"{action_id}_slow.ogg")
                 
+            logger.info("Got request to generate audio for {}".format(filename))
             if overwrite or not os.path.exists(filename):
                 if action_id == -1:
                     self.make_audio_queue.put((self._koboldai_vars.prompt, filename))
diff --git a/templates/index_new.html b/templates/index_new.html
index 0f2fecaf..9bfd08dd 100644
--- a/templates/index_new.html
+++ b/templates/index_new.html
@@ -109,6 +109,7 @@
 			<span class="tts_controls hidden var_sync_alt_story_gen_audio">
 					<button type="button" class="btn action_button" style="width: 30px; padding: 0px;" onclick='play_pause_tts()' aria-label="play"><span id="play_tts" class="material-icons-outlined" style="font-size: 1.4em;">play_arrow</span></button>
 					<button type="button" class="btn action_button" style="width: 30px; padding: 0px;" onclick='stop_tts()' aria-label="play"><span id="stop_tts" class="material-icons-outlined" style="font-size: 1.4em;">stop</span></button>
+					<button type="button" class="btn action_button" style="width: 30px; padding: 0px;" onclick='download_tts()' aria-label="play"><span id="stop_tts" class="material-icons-outlined" style="font-size: 1.4em;">download</span></button>
 			</span>
 			<button type="button" class="btn action_button submit var_sync_alt_system_aibusy" system_aibusy=False id="btnsubmit" onclick="storySubmit();" context-menu="submit-button">Submit</button>
 			<button type="button" class="btn action_button submited var_sync_alt_system_aibusy"  system_aibusy=False id="btnsent"><img id="thinking" src="static/thinking.gif" class="force_center" onclick="socket.emit('abort','');"></button>

From 256da85b413de1fd80959f04ae39b333d7caff1f Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 27 Sep 2023 14:37:51 -0400
Subject: [PATCH 09/20] Updated audio to allow for download of single audio
 file with all actions in it

---
 aiserver.py          | 46 ++++++++++++++++++++++++++++++++++++++++++++
 koboldai_settings.py |  8 +++-----
 static/koboldai.js   |  7 +++++++
 3 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 2457cb3e..38600f4b 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -7929,6 +7929,52 @@ def UI_2_audio():
                 mimetype="audio/ogg")
     show_error_notification("Error generating audio chunk", f"Something happened. Maybe check the log?")
 
+#==================================================================#
+# Download complete audio file
+#==================================================================#
+@app.route("/audio_full")
+@require_allowed_ip
+@logger.catch
+def UI_2_audio_full():
+    from pydub import AudioSegment
+    if args.no_ui:
+        return redirect('/api/latest')
+    
+    combined_audio = None
+    for action_id in range(-1, koboldai_vars.actions.action_count+1):
+        filename = os.path.join(koboldai_vars.save_paths.generated_audio, f"{action_id}.ogg")
+        filename_slow = os.path.join(koboldai_vars.save_paths.generated_audio, f"{action_id}_slow.ogg")
+        complete_filename = os.path.join(koboldai_vars.save_paths.generated_audio, "complete.ogg")
+        
+        if os.path.exists(filename_slow):
+            if combined_audio is None:
+                combined_audio = AudioSegment.from_file(filename_slow, format="ogg")
+            else:
+                combined_audio = combined_audio + AudioSegment.from_file(filename_slow, format="ogg")
+        elif os.path.exists(filename):
+            if combined_audio is None:
+                combined_audio = AudioSegment.from_file(filename, format="ogg")
+            else:
+                combined_audio = combined_audio + AudioSegment.from_file(filename, format="ogg")
+        else:
+            koboldai_vars.actions.gen_audio(action_id)
+            while not os.path.exists(filename) and time.time()-start_time < 60: #Waiting up to 60 seconds for the file to be generated
+                time.sleep(0.1)
+            if os.path.exists(filename):
+                if combined_audio is None:
+                    combined_audio = AudioSegment.from_file(filename, format="ogg")
+                else:
+                    combined_audio = combined_audio + AudioSegment.from_file(filename, format="ogg")
+            else:
+                show_error_notification("Error generating audio chunk", f"Something happened. Maybe check the log?")
+        
+    file_handle = combined_audio.export(complete_filename, format="ogg")
+    
+    return send_file(
+             complete_filename, 
+             as_attachment=True,
+             download_name = koboldai_vars.story_name,
+             mimetype="audio/ogg")
 
 #==================================================================#
 # Download of the image for an action
diff --git a/koboldai_settings.py b/koboldai_settings.py
index fdcfc6b2..cbb4e0c4 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -2058,7 +2058,6 @@ class KoboldStoryRegister(object):
             if action_id is None:
                 action_id = self.action_count
 
-            logger.info("Got request to generate audio for {}".format(action_id))
             if self.tts_model is None:
                 language = 'en'
                 model_id = 'v3_en'
@@ -2072,15 +2071,14 @@ class KoboldStoryRegister(object):
             filename = os.path.join(self._koboldai_vars.save_paths.generated_audio, f"{action_id}.ogg")
             filename_slow = os.path.join(self._koboldai_vars.save_paths.generated_audio, f"{action_id}_slow.ogg")
                 
-            logger.info("Got request to generate audio for {}".format(filename))
             if overwrite or not os.path.exists(filename):
                 if action_id == -1:
                     self.make_audio_queue.put((self._koboldai_vars.prompt, filename))
                 else:
                     self.make_audio_queue.put((self.actions[action_id]['Selected Text'], filename))
-                if self.make_audio_thread_slow is None or not self.make_audio_thread_slow.is_alive():
-                    self.make_audio_thread_slow = threading.Thread(target=self.create_wave_slow, args=(self.make_audio_queue_slow, ))
-                    self.make_audio_thread_slow.start()
+                if self.make_audio_thread is None or not self.make_audio_thread.is_alive():
+                    self.make_audio_thread = threading.Thread(target=self.create_wave, args=(self.make_audio_queue, ))
+                    self.make_audio_thread.start()
             
             if overwrite or not os.path.exists(filename_slow):
                 if action_id == -1:
diff --git a/static/koboldai.js b/static/koboldai.js
index 71b0a1a0..73eece80 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -3790,6 +3790,13 @@ function stop_tts() {
 	}
 }
 
+function download_tts() {
+	var link = document.createElement("a");
+    link.download = document.getElementsByClassName("var_sync_story_story_name ")[0].text+".ogg";
+    link.href = "/audio_full";
+    link.click();
+}
+
 function finished_tts() {
 	next_action = parseInt(document.getElementById("reader").getAttribute("action_id"))+1;
 	action_count = parseInt(document.getElementById("action_count").textContent);

From e76679feb69990242c653a03eb63474fe0d3fe96 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 27 Sep 2023 14:49:36 -0400
Subject: [PATCH 10/20] Modify split range for fast audio tts

---
 koboldai_settings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/koboldai_settings.py b/koboldai_settings.py
index cbb4e0c4..c2fb90b3 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -2099,7 +2099,7 @@ class KoboldStoryRegister(object):
             if text.strip() == "":
                 shutil.copy("data/empty_audio.ogg", filename)
             else:
-                if len(text) > 2000:
+                if len(text) > 1000:
                     text = self.sentence_re.findall(text)
                 else:
                     text = [text]

From 30110888ddc84d95dc29611e0e5dd1e98cb8b5e0 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 27 Sep 2023 14:56:26 -0400
Subject: [PATCH 11/20] Full Audio status message

---
 aiserver.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/aiserver.py b/aiserver.py
index 38600f4b..273682ed 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -7940,6 +7940,7 @@ def UI_2_audio_full():
     if args.no_ui:
         return redirect('/api/latest')
     
+    logger.info("Downloading complete audio file")
     combined_audio = None
     for action_id in range(-1, koboldai_vars.actions.action_count+1):
         filename = os.path.join(koboldai_vars.save_paths.generated_audio, f"{action_id}.ogg")
@@ -7957,6 +7958,7 @@ def UI_2_audio_full():
             else:
                 combined_audio = combined_audio + AudioSegment.from_file(filename, format="ogg")
         else:
+            logger.info("Action {} has no audio. Generating now".format(action_id))
             koboldai_vars.actions.gen_audio(action_id)
             while not os.path.exists(filename) and time.time()-start_time < 60: #Waiting up to 60 seconds for the file to be generated
                 time.sleep(0.1)
@@ -7968,6 +7970,7 @@ def UI_2_audio_full():
             else:
                 show_error_notification("Error generating audio chunk", f"Something happened. Maybe check the log?")
         
+    logger.info("Sending audio file")
     file_handle = combined_audio.export(complete_filename, format="ogg")
     
     return send_file(

From ad8d60eaaae150cd168d819b03fc12a9748453bd Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 27 Sep 2023 15:15:46 -0400
Subject: [PATCH 12/20] Moved TTS out of experimental Add install script for
 tortoise_tts and added omegaconf requirement for normal tts (was missed)

---
 environments/huggingface.yml | 1 +
 environments/ipex.yml        | 3 ++-
 environments/rocm.yml        | 3 ++-
 gensettings.py               | 2 --
 koboldai_settings.py         | 4 ++--
 requirements.txt             | 1 +
 requirements_mtj.txt         | 3 ++-
 7 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index 7f834906..0f6380a9 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -59,3 +59,4 @@ dependencies:
     - windows-curses; sys_platform == 'win32'
     - pynvml
     - xformers==0.0.21
+    - omegaconf
diff --git a/environments/ipex.yml b/environments/ipex.yml
index de2e3de8..45dad2b5 100644
--- a/environments/ipex.yml
+++ b/environments/ipex.yml
@@ -46,4 +46,5 @@ dependencies:
     - einops
     - peft==0.3.0
     - windows-curses; sys_platform == 'win32'
-    - pynvml
\ No newline at end of file
+    - pynvml
+    - omegaconf
\ No newline at end of file
diff --git a/environments/rocm.yml b/environments/rocm.yml
index 5b56bef7..6a405f03 100644
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -45,4 +45,5 @@ dependencies:
     - einops
     - peft==0.3.0
     - windows-curses; sys_platform == 'win32'
-    - pynvml
\ No newline at end of file
+    - pynvml
+    - omegaconf
\ No newline at end of file
diff --git a/gensettings.py b/gensettings.py
index 9b69af43..d1aa22d1 100644
--- a/gensettings.py
+++ b/gensettings.py
@@ -795,8 +795,6 @@ gensettingstf = [
     "sub_path": "UI",
     "classname": "story",
     "name": "gen_audio",
-    "extra_classes": "var_sync_alt_system_experimental_features"
-    ,
     "ui_level": 1
  	},
     {
diff --git a/koboldai_settings.py b/koboldai_settings.py
index c2fb90b3..0deef220 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -2054,7 +2054,7 @@ class KoboldStoryRegister(object):
         return action_text_split
     
     def gen_audio(self, action_id=None, overwrite=True):
-        if self.story_settings.gen_audio and self._koboldai_vars.experimental_features:
+        if self.story_settings.gen_audio:
             if action_id is None:
                 action_id = self.action_count
 
@@ -2160,7 +2160,7 @@ class KoboldStoryRegister(object):
                 logger.info("Slow audio took {} for {} characters".format(time.time()-start_time, text_length))
     
     def gen_all_audio(self, overwrite=False):
-        if self.story_settings.gen_audio and self._koboldai_vars.experimental_features:
+        if self.story_settings.gen_audio:
             for i in reversed([-1]+list(self.actions.keys())):
                 self.gen_audio(i, overwrite=False)
         #else:
diff --git a/requirements.txt b/requirements.txt
index 65e68b07..39fb208b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -47,3 +47,4 @@ https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu
 https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' and python_version == '3.8'
 windows-curses; sys_platform == 'win32'
 pynvml
+omegaconf
diff --git a/requirements_mtj.txt b/requirements_mtj.txt
index 3b95c7fe..a4466752 100644
--- a/requirements_mtj.txt
+++ b/requirements_mtj.txt
@@ -33,4 +33,5 @@ flask_compress
 ijson
 ftfy
 pydub
-sentencepiece
\ No newline at end of file
+sentencepiece
+omegaconf
\ No newline at end of file

From e7dd11d6f15d140820b62a3007cc63672d99dcea Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 27 Sep 2023 15:34:03 -0400
Subject: [PATCH 13/20] Add ffmpeg for audio tts

---
 environments/huggingface.yml | 1 +
 environments/ipex.yml        | 1 +
 environments/rocm.yml        | 1 +
 3 files changed, 3 insertions(+)

diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index 0f6380a9..9f3aa495 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -26,6 +26,7 @@ dependencies:
   - termcolor
   - Pillow
   - psutil
+  - ffmpeg
   - pip:
     - flask-cloudflared==0.0.10
     - flask-ngrok
diff --git a/environments/ipex.yml b/environments/ipex.yml
index 45dad2b5..1d64bdf4 100644
--- a/environments/ipex.yml
+++ b/environments/ipex.yml
@@ -22,6 +22,7 @@ dependencies:
   - termcolor
   - Pillow
   - psutil
+  - ffmpeg
   - pip:
     - -f https://developer.intel.com/ipex-whl-stable-xpu
     - torch==2.0.1a0
diff --git a/environments/rocm.yml b/environments/rocm.yml
index 6a405f03..e1eeaab0 100644
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -22,6 +22,7 @@ dependencies:
   - termcolor
   - Pillow
   - psutil
+  - ffmpeg
   - pip:
     - --extra-index-url https://download.pytorch.org/whl/rocm5.2
     - torch==1.13.1+rocm5.2

From c074f971106b166fbf3651ba56e66271c7ad73bb Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 27 Sep 2023 18:41:40 -0400
Subject: [PATCH 14/20] Working tortoise install script

---
 install_tortiose_tts.bat              | 50 +++++++++++++++++++++++++++
 install_tortiose_tts.sh               |  4 +++
 koboldai_settings.py                  |  7 +++-
 modeling/inference_models/hf_torch.py |  1 -
 4 files changed, 60 insertions(+), 2 deletions(-)
 create mode 100644 install_tortiose_tts.bat
 create mode 100755 install_tortiose_tts.sh

diff --git a/install_tortiose_tts.bat b/install_tortiose_tts.bat
new file mode 100644
index 00000000..3baf7583
--- /dev/null
+++ b/install_tortiose_tts.bat
@@ -0,0 +1,50 @@
+@echo off
+cd /D %~dp0
+
+:Isolation
+call conda deactivate 2>NUL
+set Path=%windir%\system32;%windir%;C:\Windows\System32\Wbem;%windir%\System32\WindowsPowerShell\v1.0\;%windir%\System32\OpenSSH\
+SET CONDA_SHLVL=
+SET PYTHONNOUSERSITE=1
+SET PYTHONPATH=
+
+rmdir /S /Q flask_session 2>NUL
+
+TITLE KoboldAI - Server
+SET /P M=<loader.settings
+IF %M%==1 GOTO drivemap
+IF %M%==2 GOTO subfolder
+IF %M%==3 GOTO drivemap_B
+
+:subfolder
+ECHO Runtime launching in subfolder mode
+call miniconda3\condabin\activate
+pip install git+https://github.com/neonbjb/tortoise-tts progressbar inflect librosa rotary-embedding-torch unidecode lazy_loader llvmlite numba joblib decorator audioread msgpack pooch scikit-learn soundfile soxr platformdirs threadpoolctl pydantic-core annotated-types pydantic --no-dependencies
+pip install torchaudio --index-url https://download.pytorch.org/whl/cu118
+cmd /k
+pause
+exit
+
+:drivemap
+ECHO Runtime launching in K: drive mode
+subst /D K: >nul
+subst K: miniconda3 >nul
+call K:\python\condabin\activate
+pip install git+https://github.com/neonbjb/tortoise-tts progressbar inflect librosa rotary-embedding-torch unidecode lazy_loader llvmlite numba joblib decorator audioread msgpack pooch scikit-learn soundfile soxr platformdirs threadpoolctl pydantic-core annotated-types pydantic --no-dependencies
+pip install torchaudio --index-url https://download.pytorch.org/whl/cu118
+pip install -r requirements.txt --no-dependencies
+cmd /k
+pause
+exit
+
+:drivemap_B
+ECHO Runtime launching in B: drive mode
+subst /D B: >nul
+subst B: miniconda3 >nul
+call B:\python\condabin\activate
+pip install git+https://github.com/neonbjb/tortoise-tts progressbar inflect librosa rotary-embedding-torch unidecode lazy_loader llvmlite numba joblib decorator audioread msgpack pooch scikit-learn soundfile soxr platformdirs threadpoolctl pydantic-core annotated-types pydantic --no-dependencies
+pip install torchaudio --index-url https://download.pytorch.org/whl/cu118
+pip install -r requirements.txt --no-dependencies
+cmd /k
+pause
+exit
\ No newline at end of file
diff --git a/install_tortiose_tts.sh b/install_tortiose_tts.sh
new file mode 100755
index 00000000..1a978ab4
--- /dev/null
+++ b/install_tortiose_tts.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+bin/micromamba run -r runtime -n koboldai pip install git+https://github.com/neonbjb/tortoise-tts OmegaConf deepspeed
+bin/micromamba run -r runtime -n koboldai pip install torchaudio --index-url https://download.pytorch.org/whl/cu118
+bin/micromamba run -r runtime -n koboldai pip install -r requirements.txt --no-dependencies
diff --git a/koboldai_settings.py b/koboldai_settings.py
index 0deef220..6f18cace 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -21,6 +21,7 @@ queue = None
 multi_story = False
 global enable_whitelist
 enable_whitelist = False
+slow_tts_message_shown = False
 
 if importlib.util.find_spec("tortoise") is not None:
     from tortoise import api
@@ -2119,10 +2120,14 @@ class KoboldStoryRegister(object):
     
     def create_wave_slow(self, make_audio_queue_slow):
         import pydub
+        global slow_tts_message_shown
         sample_rate = 24000
         speaker = 'train_daws'
+        if importlib.util.find_spec("tortoise") is None and not slow_tts_message_shown:
+            logger.info("Disabling slow (and higher quality) tts as it's not installed")
+            slow_tts_message_shown=True
         if self.tortoise is None and importlib.util.find_spec("tortoise") is not None:
-           self.tortoise=api.TextToSpeech(use_deepspeed=os.environ.get('deepspeed', "true").lower()=="true", kv_cache=os.environ.get('kv_cache', "true").lower()=="true", half=True)
+           self.tortoise=api.TextToSpeech(use_deepspeed=os.environ.get('deepspeed', "false").lower()=="true", kv_cache=os.environ.get('kv_cache', "true").lower()=="true", half=True)
         
         if importlib.util.find_spec("tortoise") is not None:
             voice_samples, conditioning_latents = load_voices([speaker])
diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index f5b5cfae..fcdd9fb9 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -351,7 +351,6 @@ class HFTorchInferenceModel(HFInferenceModel):
                     bad_words_ids=self.active_badwordsids,
                     use_cache=True,
                     num_return_sequences=batch_count,
-                    pad_token_id=self.tokenizer.eos_token_id,
                 )
             else:
                  genout = self.model.generate(

From b3f8f333c29f2e9e6838ae468fe9a9d6f6f72c4e Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 28 Sep 2023 10:22:18 -0400
Subject: [PATCH 15/20] Added additional logging

---
 koboldai_settings.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/koboldai_settings.py b/koboldai_settings.py
index 6f18cace..f1ea538e 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -2166,6 +2166,7 @@ class KoboldStoryRegister(object):
     
     def gen_all_audio(self, overwrite=False):
         if self.story_settings.gen_audio:
+            logger.info("Generating audio for any missing actions")
             for i in reversed([-1]+list(self.actions.keys())):
                 self.gen_audio(i, overwrite=False)
         #else:

From 52a2b650b7f6878632d86881afb5bd24c672bf16 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 28 Sep 2023 11:43:21 -0400
Subject: [PATCH 16/20] Added better feedback for downloading audio file since
 it can take some time to combine the chunks.

---
 aiserver.py              | 31 ++++++++++++++++++++-----------
 static/koboldai.js       | 16 ++++++++++++----
 templates/index_new.html |  2 +-
 3 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 273682ed..0f064872 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -7932,20 +7932,19 @@ def UI_2_audio():
 #==================================================================#
 # Download complete audio file
 #==================================================================#
-@app.route("/audio_full")
-@require_allowed_ip
-@logger.catch
-def UI_2_audio_full():
+@socketio.on("gen_full_audio")
+def UI_2_gen_full_audio(data):
     from pydub import AudioSegment
     if args.no_ui:
         return redirect('/api/latest')
     
-    logger.info("Downloading complete audio file")
+    logger.info("Generating complete audio file")
     combined_audio = None
+    complete_filename = os.path.join(koboldai_vars.save_paths.generated_audio, "complete.ogg")
     for action_id in range(-1, koboldai_vars.actions.action_count+1):
         filename = os.path.join(koboldai_vars.save_paths.generated_audio, f"{action_id}.ogg")
         filename_slow = os.path.join(koboldai_vars.save_paths.generated_audio, f"{action_id}_slow.ogg")
-        complete_filename = os.path.join(koboldai_vars.save_paths.generated_audio, "complete.ogg")
+        
         
         if os.path.exists(filename_slow):
             if combined_audio is None:
@@ -7972,12 +7971,22 @@ def UI_2_audio_full():
         
     logger.info("Sending audio file")
     file_handle = combined_audio.export(complete_filename, format="ogg")
+    return True
     
-    return send_file(
-             complete_filename, 
-             as_attachment=True,
-             download_name = koboldai_vars.story_name,
-             mimetype="audio/ogg")
+
+@app.route("/audio_full")
+@require_allowed_ip
+@logger.catch
+def UI_2_audio_full():
+    logger.info("Downloading complete audio file")
+    complete_filename = os.path.join(koboldai_vars.save_paths.generated_audio, "complete.ogg")
+    if os.path.exists(complete_filename):
+        return send_file(
+                 complete_filename, 
+                 as_attachment=True,
+                 download_name = koboldai_vars.story_name,
+                 mimetype="audio/ogg")
+             
 
 #==================================================================#
 # Download of the image for an action
diff --git a/static/koboldai.js b/static/koboldai.js
index 73eece80..971e1788 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -3791,10 +3791,18 @@ function stop_tts() {
 }
 
 function download_tts() {
-	var link = document.createElement("a");
-    link.download = document.getElementsByClassName("var_sync_story_story_name ")[0].text+".ogg";
-    link.href = "/audio_full";
-    link.click();
+	document.getElementById("download_tts").innerText = "hourglass_empty";
+	socket.emit("gen_full_audio", {}, download_actual_file_tts);
+}
+
+function download_actual_file_tts(data) {
+	if (data) {
+		var link = document.createElement("a");
+		link.download = document.getElementsByClassName("var_sync_story_story_name ")[0].text+".ogg";
+		link.href = "/audio_full";
+		link.click();
+		document.getElementById("download_tts").innerText = "download";
+	}
 }
 
 function finished_tts() {
diff --git a/templates/index_new.html b/templates/index_new.html
index 9bfd08dd..e1931078 100644
--- a/templates/index_new.html
+++ b/templates/index_new.html
@@ -109,7 +109,7 @@
 			<span class="tts_controls hidden var_sync_alt_story_gen_audio">
 					<button type="button" class="btn action_button" style="width: 30px; padding: 0px;" onclick='play_pause_tts()' aria-label="play"><span id="play_tts" class="material-icons-outlined" style="font-size: 1.4em;">play_arrow</span></button>
 					<button type="button" class="btn action_button" style="width: 30px; padding: 0px;" onclick='stop_tts()' aria-label="play"><span id="stop_tts" class="material-icons-outlined" style="font-size: 1.4em;">stop</span></button>
-					<button type="button" class="btn action_button" style="width: 30px; padding: 0px;" onclick='download_tts()' aria-label="play"><span id="stop_tts" class="material-icons-outlined" style="font-size: 1.4em;">download</span></button>
+					<button type="button" class="btn action_button" style="width: 30px; padding: 0px;" onclick='download_tts()' aria-label="play"><span id="download_tts" class="material-icons-outlined" style="font-size: 1.4em;">download</span></button>
 			</span>
 			<button type="button" class="btn action_button submit var_sync_alt_system_aibusy" system_aibusy=False id="btnsubmit" onclick="storySubmit();" context-menu="submit-button">Submit</button>
 			<button type="button" class="btn action_button submited var_sync_alt_system_aibusy"  system_aibusy=False id="btnsent"><img id="thinking" src="static/thinking.gif" class="force_center" onclick="socket.emit('abort','');"></button>

From a7159392377c6937bb5d6b2f7da1cdbbecce446a Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Sat, 30 Sep 2023 16:55:39 -0400
Subject: [PATCH 17/20] Added audio status bar to show which actions have audio
 generated and under what form (yellow for quick, green for slow, red for
 missing). Adjustable in CSS themes.

---
 koboldai_settings.py     | 65 +++++++++++++++++++++++++++++++++++-----
 static/koboldai.css      | 45 +++++++++++++++++++++++++++-
 static/koboldai.js       | 24 +++++++++++++++
 templates/index_new.html |  4 +++
 4 files changed, 129 insertions(+), 9 deletions(-)

diff --git a/koboldai_settings.py b/koboldai_settings.py
index f1ea538e..6a7ef81c 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -101,6 +101,28 @@ def process_variable_changes(socketio, classname, name, value, old_value, debug_
                         else:
                             socketio.emit("var_changed", {"classname": classname, "name": name, "old_value":  "*" * len(old_value) if old_value is not None else "", "value": "*" * len(value) if value is not None else "", "transmit_time": transmit_time}, include_self=True, broadcast=True, room=room)
 
+def basic_send(socketio, classname, event, data):
+    #Get which room we'll send the messages to
+    global multi_story
+    if multi_story:
+        if classname != 'story':
+            room = 'UI_2'
+        else:
+            if has_request_context():
+                room = 'default' if 'story' not in session else session['story']
+            else:
+                logger.error("We tried to access the story register outside of an http context. Will not work in multi-story mode")
+                return
+    else:
+        room = "UI_2"
+    if not has_request_context():
+        if queue is not None:
+            #logger.debug("Had to use queue")
+            queue.put([event, data, {"broadcast":True, "room":room}])
+    else:
+        if socketio is not None:
+            socketio.emit(event, data, include_self=True, broadcast=True, room=room)
+
 class koboldai_vars(object):
     def __init__(self, socketio):
         self._model_settings = model_settings(socketio, self)
@@ -1419,6 +1441,7 @@ class KoboldStoryRegister(object):
         self.make_audio_thread_slow = None
         self.make_audio_queue_slow = multiprocessing.Queue()
         self.probability_buffer = None
+        self.audio_status = {}
         for item in sequence:
             self.append(item)
     
@@ -1576,6 +1599,13 @@ class KoboldStoryRegister(object):
 
             if "Original Text" not in json_data["actions"][item]:
                 json_data["actions"][item]["Original Text"] = json_data["actions"][item]["Selected Text"]
+                
+            if "audio_gen" not in json_data["actions"][item]:
+                json_data["actions"][item]["audio_gen"] = 0
+                
+            if "image_gen" not in json_data["actions"][item]:
+                json_data["actions"][item]["image_gen"] = False
+
 
             temp[int(item)] = json_data['actions'][item]
             if int(item) >= self.action_count-100: #sending last 100 items to UI
@@ -2055,9 +2085,14 @@ class KoboldStoryRegister(object):
         return action_text_split
     
     def gen_audio(self, action_id=None, overwrite=True):
+        if action_id is None:
+            action_id = self.action_count
+        if overwrite:
+            if action_id != -1:
+                self.actions[action_id]["audio_gen"] = 0
+                basic_send(self._socketio, "story", "set_audio_status", {"id": action_id, "action": self.actions[action_id]})
         if self.story_settings.gen_audio:
-            if action_id is None:
-                action_id = self.action_count
+            
 
             if self.tts_model is None:
                 language = 'en'
@@ -2074,28 +2109,36 @@ class KoboldStoryRegister(object):
                 
             if overwrite or not os.path.exists(filename):
                 if action_id == -1:
-                    self.make_audio_queue.put((self._koboldai_vars.prompt, filename))
+                    self.make_audio_queue.put((self._koboldai_vars.prompt, filename, action_id))
                 else:
-                    self.make_audio_queue.put((self.actions[action_id]['Selected Text'], filename))
+                    self.make_audio_queue.put((self.actions[action_id]['Selected Text'], filename, action_id))
                 if self.make_audio_thread is None or not self.make_audio_thread.is_alive():
                     self.make_audio_thread = threading.Thread(target=self.create_wave, args=(self.make_audio_queue, ))
                     self.make_audio_thread.start()
+            elif not overwrite and os.path.exists(filename):
+                if action_id != -1:
+                    self.actions[action_id]["audio_gen"] = 1
             
             if overwrite or not os.path.exists(filename_slow):
                 if action_id == -1:
-                    self.make_audio_queue_slow.put((self._koboldai_vars.prompt, filename_slow))
+                    self.make_audio_queue_slow.put((self._koboldai_vars.prompt, filename_slow, action_id))
                 else:
-                    self.make_audio_queue_slow.put((self.actions[action_id]['Selected Text'], filename_slow))
+                    self.make_audio_queue_slow.put((self.actions[action_id]['Selected Text'], filename_slow, action_id))
                 if self.make_audio_thread_slow is None or not self.make_audio_thread_slow.is_alive():
                     self.make_audio_thread_slow = threading.Thread(target=self.create_wave_slow, args=(self.make_audio_queue_slow, ))
                     self.make_audio_thread_slow.start()
+            elif not overwrite and os.path.exists(filename_slow):
+                if action_id != -1:
+                    self.actions[action_id]["audio_gen"] = 2
+                    basic_send(self._socketio, "story", "set_audio_status", {"id": action_id, "action": self.actions[action_id]})
+                    
                 
     def create_wave(self, make_audio_queue):
         import pydub
         sample_rate = 24000
         speaker = 'en_5'
         while not make_audio_queue.empty():
-            (text, filename) = make_audio_queue.get()
+            (text, filename, action_id) = make_audio_queue.get()
             logger.info("Creating audio for {}".format(os.path.basename(filename)))
             if text.strip() == "":
                 shutil.copy("data/empty_audio.ogg", filename)
@@ -2117,6 +2160,9 @@ class KoboldStoryRegister(object):
                         output = output + pydub.AudioSegment(np.int16(audio * 2 ** 15).tobytes(), frame_rate=sample_rate, sample_width=2, channels=channels)
                 if output is not None:
                     output.export(filename, format="ogg", bitrate="16k")
+            if action_id != -1 and self.actions[action_id]["audio_gen"] == 0:
+                self.actions[action_id]["audio_gen"] = 1
+                basic_send(self._socketio, "story", "set_audio_status", {"id": action_id, "action": self.actions[action_id]})
     
     def create_wave_slow(self, make_audio_queue_slow):
         import pydub
@@ -2133,7 +2179,7 @@ class KoboldStoryRegister(object):
             voice_samples, conditioning_latents = load_voices([speaker])
             while not make_audio_queue_slow.empty():
                 start_time = time.time()
-                (text, filename) = make_audio_queue_slow.get()
+                (text, filename, action_id) = make_audio_queue_slow.get()
                 text_length = len(text)
                 logger.info("Creating audio for {}".format(os.path.basename(filename)))
                 if text.strip() == "":
@@ -2162,6 +2208,9 @@ class KoboldStoryRegister(object):
                         output = output + pydub.AudioSegment(np.int16(audio * 2 ** 15).tobytes(), frame_rate=sample_rate, sample_width=2, channels=channels)
                 if output is not None:
                     output.export(filename, format="ogg", bitrate="16k")
+                if action_id != -1:
+                    self.actions[action_id]["audio_gen"] = 2
+                    basic_send(self._socketio, "story", "set_audio_status", {"id": action_id, "action": self.actions[action_id]})
                 logger.info("Slow audio took {} for {} characters".format(time.time()-start_time, text_length))
     
     def gen_all_audio(self, overwrite=False):
diff --git a/static/koboldai.css b/static/koboldai.css
index a419a4f3..9ee96976 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -1932,7 +1932,7 @@ body {
 }
 
 .tts_controls.hidden[story_gen_audio="true"] {
-	display: inherit !important; 
+	display: flex !important; 
 }
 
 .inputrow .tts_controls {
@@ -1942,6 +1942,49 @@ body {
 	width: 100%;
 	text-align: center;
 	overflow: hidden;
+	flex-direction: row;
+}
+.inputrow .tts_controls div {
+	padding: 0px;
+	height: 100%;
+	width: 100%;
+	text-align: center;
+	overflow: hidden;
+	display: flex;
+	flex-direction: column;
+	/*flex-basis: 100%;*/
+}
+
+.audio_status_action {
+	flex-basis: 100%;
+}
+
+.audio_status_action[status="2"] {
+	background-color: green;
+}
+
+.audio_status_action[status="1"] {
+	background-color: yellow;
+}
+
+.audio_status_action[status="0"] {
+	background-color: red;
+}
+
+.audio_status_action[status="-1"] {
+	display: none;
+}
+
+.inputrow .tts_controls div button {
+	flex-basis: 100%;
+}
+
+.inputrow .tts_controls .audio_status {
+	padding: 0px;
+	height: 100%;
+	width: 2px;
+	display: flex;
+	flex-direction: column;
 }
 
 .inputrow .back {
diff --git a/static/koboldai.js b/static/koboldai.js
index 971e1788..fa0180dc 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -45,6 +45,7 @@ socket.on("show_error_notification", function(data) { reportError(data.title, da
 socket.on("generated_wi", showGeneratedWIData);
 socket.on("stream_tokens", stream_tokens);
 socket.on("show_options", show_options);
+socket.on("set_audio_status", set_audio_status);
 //socket.onAny(function(event_name, data) {console.log({"event": event_name, "class": data.classname, "data": data});});
 
 // Must be done before any elements are made; we track their changes.
@@ -601,6 +602,7 @@ function create_options(action) {
 
 function process_actions_data(data) {
 	start_time = Date.now();
+	console.log(data);
 	if (Array.isArray(data.value)) {
 		actions = data.value;
 	} else {
@@ -637,6 +639,7 @@ function process_actions_data(data) {
 		actions_data[parseInt(action.id)] = action.action;
 		do_story_text_updates(action);
 		create_options(action);
+		set_audio_status(action);
 	}
 	
 	clearTimeout(game_text_scroll_timeout);
@@ -648,6 +651,27 @@ function process_actions_data(data) {
 	
 }
 
+function set_audio_status(action) {
+	if (!('audio_gen' in action.action)) {
+		action.action.audio_gen = 0;
+	}
+	if (!(document.getElementById("audio_gen_status_"+action.id))) {
+		sp = document.createElement("SPAN");
+		sp.id = "audio_gen_status_"+action.id
+		sp.classList.add("audio_status_action");
+		sp.setAttribute("status", -1);
+		document.getElementById("audio_status").appendChild(sp);
+	}
+	document.getElementById("audio_gen_status_"+action.id).setAttribute("status", action.action.audio_gen);
+	
+	//Delete empty actions
+	if (action.action['Selected Text'] == "") {
+		console.log("disabling status");
+		document.getElementById("audio_gen_status_"+action.id).setAttribute("status", -1);
+	}
+	console.log("Setting " + action.id + " to " + action.action.audio_gen);
+}
+
 function parseChatMessages(text) {
 	let messages = [];
 
diff --git a/templates/index_new.html b/templates/index_new.html
index e1931078..920471ec 100644
--- a/templates/index_new.html
+++ b/templates/index_new.html
@@ -107,9 +107,13 @@
 				</div>
 			</div><br>
 			<span class="tts_controls hidden var_sync_alt_story_gen_audio">
+				<div class="audio_status" id="audio_status">
+				</div>
+				<div>
 					<button type="button" class="btn action_button" style="width: 30px; padding: 0px;" onclick='play_pause_tts()' aria-label="play"><span id="play_tts" class="material-icons-outlined" style="font-size: 1.4em;">play_arrow</span></button>
 					<button type="button" class="btn action_button" style="width: 30px; padding: 0px;" onclick='stop_tts()' aria-label="play"><span id="stop_tts" class="material-icons-outlined" style="font-size: 1.4em;">stop</span></button>
 					<button type="button" class="btn action_button" style="width: 30px; padding: 0px;" onclick='download_tts()' aria-label="play"><span id="download_tts" class="material-icons-outlined" style="font-size: 1.4em;">download</span></button>
+				</div>
 			</span>
 			<button type="button" class="btn action_button submit var_sync_alt_system_aibusy" system_aibusy=False id="btnsubmit" onclick="storySubmit();" context-menu="submit-button">Submit</button>
 			<button type="button" class="btn action_button submited var_sync_alt_system_aibusy"  system_aibusy=False id="btnsent"><img id="thinking" src="static/thinking.gif" class="force_center" onclick="socket.emit('abort','');"></button>

From 7a3fc2633616e1c6342ac03c1244230ed4e96f39 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Sun, 1 Oct 2023 10:11:23 -0400
Subject: [PATCH 18/20] Images now change based on what action you're on. Can
 regenerate images for old actions.

---
 aiserver.py              | 13 +++++++++++
 static/koboldai.js       | 50 +++++++++++++++++++++++++++++++++++-----
 templates/index_new.html |  2 +-
 3 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 0f064872..99ce3979 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -7588,6 +7588,19 @@ def UI2_clear_generated_image(data):
     koboldai_vars.picture = ""
     koboldai_vars.picture_prompt = ""
 
+#==================================================================#
+# Retrieve previous images
+#==================================================================#
+@socketio.on("get_story_image")
+@logger.catch
+def UI_2_get_story_image(data):
+    action_id = data['action_id']
+    (filename, prompt) = koboldai_vars.actions.get_picture(action_id)
+    print(filename)
+    if filename is not None:
+        with open(filename, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode("utf-8") 
+
 #@logger.catch
 def get_items_locations_from_text(text):
     # load model and tokenizer
diff --git a/static/koboldai.js b/static/koboldai.js
index fa0180dc..3762be6c 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -149,6 +149,7 @@ const context_menu_actions = {
 		{label: "Add to World Info Entry", icon: "auto_stories", enabledOn: "SELECTION", click: push_selection_to_world_info},
 		{label: "Add as Bias", icon: "insights", enabledOn: "SELECTION", click: push_selection_to_phrase_bias},
 		{label: "Retry from here", icon: "refresh", enabledOn: "CARET", click: retry_from_here},
+		{label: "Generate image for here", icon: "image", enabledOn: "CARET", click: generate_image},
 		null,
 		{label: "Take Screenshot", icon: "screenshot_monitor", enabledOn: "SELECTION", click: screenshot_selection},
 		// Not implemented! See view_selection_probabiltiies
@@ -602,7 +603,6 @@ function create_options(action) {
 
 function process_actions_data(data) {
 	start_time = Date.now();
-	console.log(data);
 	if (Array.isArray(data.value)) {
 		actions = data.value;
 	} else {
@@ -669,7 +669,6 @@ function set_audio_status(action) {
 		console.log("disabling status");
 		document.getElementById("audio_gen_status_"+action.id).setAttribute("status", -1);
 	}
-	console.log("Setting " + action.id + " to " + action.action.audio_gen);
 }
 
 function parseChatMessages(text) {
@@ -727,9 +726,9 @@ function do_story_text_updates(action) {
 			item.classList.add("rawtext");
 			item.setAttribute("chunk", action.id);
 			item.setAttribute("tabindex", parseInt(action.id)+1);
-			//item.addEventListener("focus", (event) => {
-			//	set_edit(event.target);
-			//});
+			item.addEventListener("focus", (event) => {
+				set_image_action(action.id);
+			});
 			
 			//need to find the closest element
 			closest_element = document.getElementById("story_prompt");
@@ -3467,7 +3466,7 @@ function fix_dirty_game_text() {
 
 	if (dirty_chunks.includes("game_text")) {
 		dirty_chunks = dirty_chunks.filter(item => item != "game_text");
-		console.log("Firing Fix messed up text");
+		//console.log("Firing Fix messed up text");
 		//Fixing text outside of chunks
 		for (node of game_text.childNodes) {
 			if ((!(node instanceof HTMLElement) || !node.hasAttribute("chunk")) && (node.textContent.trim() != "")) {
@@ -3862,6 +3861,29 @@ function tts_playing() {
 	}
 }
 
+function set_image_action(action_id) {
+	console.log(action_id);
+	socket.emit("get_story_image", {action_id: action_id}, change_image);
+}
+
+function change_image(data) {
+	image_area = document.getElementById("action image");
+
+	let maybeImage = image_area.getElementsByClassName("action_image")[0];
+	if (maybeImage) maybeImage.remove();
+
+	$el("#image-loading").classList.add("hidden");
+
+	if (data != undefined) {
+		var image = new Image();
+		image.src = 'data:image/png;base64,'+data;
+		image.classList.add("action_image");
+		image.setAttribute("context-menu", "generated-image");
+		image.addEventListener("click", imgGenView);
+		image_area.appendChild(image);
+	}
+}
+
 function view_selection_probabilities() {
 	// Not quite sure how this should work yet. Probabilities are obviously on
 	// the token level, which we have no UI representation of. There are other
@@ -7089,6 +7111,22 @@ $el("#generate-image-button").addEventListener("click", function() {
 	socket.emit("generate_image", {});
 });
 
+function generate_image() {
+	let chunk = null;
+	for (element of document.getElementsByClassName("editing")) {
+		if (element.id == 'story_prompt') {
+			chunk = -1
+		} else {
+			chunk = parseInt(element.id.split(" ").at(-1));
+		}
+	}
+	if (chunk != null) {
+		socket.emit("generate_image", {action_id: chunk});
+	}
+	
+	
+}
+
 /* -- Shiny New Chat -- */
 function addMessage(author, content, actionId, afterMsgEl=null, time=null) {
 	if (!time) time = Number(new Date());
diff --git a/templates/index_new.html b/templates/index_new.html
index 920471ec..ced69636 100644
--- a/templates/index_new.html
+++ b/templates/index_new.html
@@ -54,7 +54,7 @@
 			</div>
 
 			<div class="gametext" id="Selected Text" contenteditable="false" tabindex="0" onkeyup="return set_edit(event);">
-				<span id="story_prompt" class="var_sync_story_prompt var_sync_alt_story_prompt_in_ai rawtext" chunk="-1"></span></div><!--don't move the /div down or it'll cause odd spacing issues in the UI--->
+				<span id="story_prompt" class="var_sync_story_prompt var_sync_alt_story_prompt_in_ai rawtext" chunk="-1" onfocus='set_image_action(-1);'></span></div><!--don't move the /div down or it'll cause odd spacing issues in the UI--->
 		</div>
 
 		<!------------ Sequences --------------------->

From bab52fb03503e81d07f01be7745e064b620488ba Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Sat, 14 Oct 2023 11:32:20 -0400
Subject: [PATCH 19/20] Removed tortoise tts install script

---
 install_tortiose_tts.bat | 50 ----------------------------------------
 install_tortiose_tts.sh  |  4 ----
 2 files changed, 54 deletions(-)
 delete mode 100644 install_tortiose_tts.bat
 delete mode 100755 install_tortiose_tts.sh

diff --git a/install_tortiose_tts.bat b/install_tortiose_tts.bat
deleted file mode 100644
index 3baf7583..00000000
--- a/install_tortiose_tts.bat
+++ /dev/null
@@ -1,50 +0,0 @@
-@echo off
-cd /D %~dp0
-
-:Isolation
-call conda deactivate 2>NUL
-set Path=%windir%\system32;%windir%;C:\Windows\System32\Wbem;%windir%\System32\WindowsPowerShell\v1.0\;%windir%\System32\OpenSSH\
-SET CONDA_SHLVL=
-SET PYTHONNOUSERSITE=1
-SET PYTHONPATH=
-
-rmdir /S /Q flask_session 2>NUL
-
-TITLE KoboldAI - Server
-SET /P M=<loader.settings
-IF %M%==1 GOTO drivemap
-IF %M%==2 GOTO subfolder
-IF %M%==3 GOTO drivemap_B
-
-:subfolder
-ECHO Runtime launching in subfolder mode
-call miniconda3\condabin\activate
-pip install git+https://github.com/neonbjb/tortoise-tts progressbar inflect librosa rotary-embedding-torch unidecode lazy_loader llvmlite numba joblib decorator audioread msgpack pooch scikit-learn soundfile soxr platformdirs threadpoolctl pydantic-core annotated-types pydantic --no-dependencies
-pip install torchaudio --index-url https://download.pytorch.org/whl/cu118
-cmd /k
-pause
-exit
-
-:drivemap
-ECHO Runtime launching in K: drive mode
-subst /D K: >nul
-subst K: miniconda3 >nul
-call K:\python\condabin\activate
-pip install git+https://github.com/neonbjb/tortoise-tts progressbar inflect librosa rotary-embedding-torch unidecode lazy_loader llvmlite numba joblib decorator audioread msgpack pooch scikit-learn soundfile soxr platformdirs threadpoolctl pydantic-core annotated-types pydantic --no-dependencies
-pip install torchaudio --index-url https://download.pytorch.org/whl/cu118
-pip install -r requirements.txt --no-dependencies
-cmd /k
-pause
-exit
-
-:drivemap_B
-ECHO Runtime launching in B: drive mode
-subst /D B: >nul
-subst B: miniconda3 >nul
-call B:\python\condabin\activate
-pip install git+https://github.com/neonbjb/tortoise-tts progressbar inflect librosa rotary-embedding-torch unidecode lazy_loader llvmlite numba joblib decorator audioread msgpack pooch scikit-learn soundfile soxr platformdirs threadpoolctl pydantic-core annotated-types pydantic --no-dependencies
-pip install torchaudio --index-url https://download.pytorch.org/whl/cu118
-pip install -r requirements.txt --no-dependencies
-cmd /k
-pause
-exit
\ No newline at end of file
diff --git a/install_tortiose_tts.sh b/install_tortiose_tts.sh
deleted file mode 100755
index 1a978ab4..00000000
--- a/install_tortiose_tts.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-bin/micromamba run -r runtime -n koboldai pip install git+https://github.com/neonbjb/tortoise-tts OmegaConf deepspeed
-bin/micromamba run -r runtime -n koboldai pip install torchaudio --index-url https://download.pytorch.org/whl/cu118
-bin/micromamba run -r runtime -n koboldai pip install -r requirements.txt --no-dependencies

From 27eb7165afc145986d0e900012b60673b3e20759 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Sat, 14 Oct 2023 11:35:38 -0400
Subject: [PATCH 20/20] Fix for requirements.txt

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 8d6ac290..eeaf916b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -50,4 +50,4 @@ windows-curses; sys_platform == 'win32'
 pynvml
 flash_attn==2.3.0
 xformers==0.0.21
-exllamav2==0.0.4omegaconf
+omegaconf