Fix for long prompt audio generation

2025-06-05 21:59:24 +02:00 · 2022-12-06 18:58:27 -05:00
parent 80a8794f63
commit fcacd53b62
2 changed files with 14 additions and 11 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -7281,7 +7281,7 @@ def loadJSON(json_text_or_dict, from_file=None):
    logger.debug("Loading JSON data took {}s".format(time.time()-start_time))
    if "file_version" in json_data:
        if json_data['file_version'] == 2:
-            load_story_v2(json_data)
+            load_story_v2(json_data, from_file=from_file)
        else:
            load_story_v1(json_data, from_file=from_file)
    else:
@@ -7417,7 +7417,7 @@ def load_story_v1(js, from_file=None):
        shutil.move(from_file, koboldai_vars.save_paths.story.replace("story.json", "v1_file.json"))
-def load_story_v2(js):
+def load_story_v2(js, from_file=None):
    logger.debug("Loading V2 Story")
    logger.debug("Called from {}".format(inspect.stack()[1].function))
    leave_room(session['story'])
@@ -7426,6 +7426,11 @@ def load_story_v2(js):
    koboldai_vars.load_story(session['story'], js)
    if from_file is not None and os.path.basename(from_file) != "story.json":
        #Save the file so we get a new V2 format, then move the save file into the proper directory
        koboldai_vars.save_story()
        shutil.move(from_file, koboldai_vars.save_paths.story.replace("story.json", "v2_file.json"))
 #==================================================================#
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -1919,8 +1919,6 @@ class KoboldStoryRegister(object):
            if action_id is None:
                action_id = self.action_count
            logger.info("Generating audio for action {}".format(action_id))
            if self.tts_model is None:
                language = 'en'
                model_id = 'v3_en'
@@ -1948,10 +1946,11 @@ class KoboldStoryRegister(object):
        speaker = 'en_5'
        while not make_audio_queue.empty():
            (text, filename) = make_audio_queue.get()
-            if text == "":
+            logger.info("Creating audio for {}".format(os.path.basename(filename)))
-                shutil.move("data/empty_audio.ogg", filename)
+            if text.strip() == "":
                shutil.copy("data/empty_audio.ogg", filename)
            else:
-                if len(text) > 5000:
+                if len(text) > 2000:
                    text = self.sentence_re.findall(text)
                else:
                    text = [text]
@@ -1963,11 +1962,10 @@ class KoboldStoryRegister(object):
                                            #audio_path=filename)
                    channels = 2 if (audio.ndim == 2 and audio.shape[1] == 2) else 1
                    if output is None:
-                        output = np.int16(audio * 2 ** 15)
+                        output = pydub.AudioSegment(np.int16(audio * 2 ** 15).tobytes(), frame_rate=sample_rate, sample_width=2, channels=channels)
                    else:
-                        output = numpy.concatenate(output, np.int16(audio * 2 ** 15))
+                        output = output + pydub.AudioSegment(np.int16(audio * 2 ** 15).tobytes(), frame_rate=sample_rate, sample_width=2, channels=channels)
-                song = pydub.AudioSegment(output.tobytes(), frame_rate=sample_rate, sample_width=2, channels=channels)
+                output.export(filename, format="ogg", bitrate="16k")
                song.export(filename, format="ogg", bitrate="16k")
    def gen_all_audio(self, overwrite=False):
        if self.story_settings.gen_audio and self.koboldai_vars.experimental_features: