Fix for long prompt audio generation

This commit is contained in:
ebolam
2022-12-06 18:58:27 -05:00
parent 80a8794f63
commit fcacd53b62
2 changed files with 14 additions and 11 deletions

View File

@@ -7281,7 +7281,7 @@ def loadJSON(json_text_or_dict, from_file=None):
logger.debug("Loading JSON data took {}s".format(time.time()-start_time))
if "file_version" in json_data:
if json_data['file_version'] == 2:
load_story_v2(json_data)
load_story_v2(json_data, from_file=from_file)
else:
load_story_v1(json_data, from_file=from_file)
else:
@@ -7417,7 +7417,7 @@ def load_story_v1(js, from_file=None):
shutil.move(from_file, koboldai_vars.save_paths.story.replace("story.json", "v1_file.json"))
def load_story_v2(js):
def load_story_v2(js, from_file=None):
logger.debug("Loading V2 Story")
logger.debug("Called from {}".format(inspect.stack()[1].function))
leave_room(session['story'])
@@ -7426,6 +7426,11 @@ def load_story_v2(js):
koboldai_vars.load_story(session['story'], js)
if from_file is not None and os.path.basename(from_file) != "story.json":
#Save the file so we get a new V2 format, then move the save file into the proper directory
koboldai_vars.save_story()
shutil.move(from_file, koboldai_vars.save_paths.story.replace("story.json", "v2_file.json"))
#==================================================================#

View File

@@ -1918,8 +1918,6 @@ class KoboldStoryRegister(object):
if self.story_settings.gen_audio and self.koboldai_vars.experimental_features:
if action_id is None:
action_id = self.action_count
logger.info("Generating audio for action {}".format(action_id))
if self.tts_model is None:
language = 'en'
@@ -1948,10 +1946,11 @@ class KoboldStoryRegister(object):
speaker = 'en_5'
while not make_audio_queue.empty():
(text, filename) = make_audio_queue.get()
if text == "":
shutil.move("data/empty_audio.ogg", filename)
logger.info("Creating audio for {}".format(os.path.basename(filename)))
if text.strip() == "":
shutil.copy("data/empty_audio.ogg", filename)
else:
if len(text) > 5000:
if len(text) > 2000:
text = self.sentence_re.findall(text)
else:
text = [text]
@@ -1963,11 +1962,10 @@ class KoboldStoryRegister(object):
#audio_path=filename)
channels = 2 if (audio.ndim == 2 and audio.shape[1] == 2) else 1
if output is None:
output = np.int16(audio * 2 ** 15)
output = pydub.AudioSegment(np.int16(audio * 2 ** 15).tobytes(), frame_rate=sample_rate, sample_width=2, channels=channels)
else:
output = numpy.concatenate(output, np.int16(audio * 2 ** 15))
song = pydub.AudioSegment(output.tobytes(), frame_rate=sample_rate, sample_width=2, channels=channels)
song.export(filename, format="ogg", bitrate="16k")
output = output + pydub.AudioSegment(np.int16(audio * 2 ** 15).tobytes(), frame_rate=sample_rate, sample_width=2, channels=channels)
output.export(filename, format="ogg", bitrate="16k")
def gen_all_audio(self, overwrite=False):
if self.story_settings.gen_audio and self.koboldai_vars.experimental_features: