More TTS stuff. Now generates OGG files (dependencies not included in requirements yet)

This commit is contained in:
ebolam
2022-11-19 19:04:15 -05:00
parent 4b54b97080
commit a7747be519
2 changed files with 14 additions and 7 deletions

View File

@@ -9550,12 +9550,12 @@ def UI_2_test_match():
@logger.catch
def UI_2_audio():
action_id = int(request.args['id']) if 'id' in request.args else len(koboldai_vars.actions)
filename="stories/{}/{}.wav".format(koboldai_vars.story_id, action_id)
filename="stories/{}/{}.ogg".format(koboldai_vars.story_id, action_id)
if not os.path.exists(filename):
koboldai_vars.actions.gen_audio(action_id)
return send_file(
filename,
mimetype="audio/wav")
mimetype="audio/ogg")
#==================================================================#

View File

@@ -9,6 +9,7 @@ import multiprocessing
from logger import logger
import eventlet
import torch
import numpy as np
serverstarted = False
queue = None
@@ -1681,7 +1682,7 @@ class KoboldStoryRegister(object):
#self.tts_model.to(torch.device(0)) # gpu or cpu
self.tts_model.to(torch.device("cpu")) # gpu or cpu
filename="stories/{}/{}.wav".format(self.story_settings.story_id, action_id)
filename="stories/{}/{}.ogg".format(self.story_settings.story_id, action_id)
if not os.path.exists("stories/{}".format(self.story_settings.story_id)):
os.mkdir("stories/{}".format(self.story_settings.story_id))
@@ -1692,14 +1693,20 @@ class KoboldStoryRegister(object):
self.make_audio_thread.start()
def create_wave(self, model, make_audio_queue):
sample_rate = 48000
import pydub
sample_rate = 24000
speaker = 'en_5'
while not make_audio_queue.empty():
(text, filename) = make_audio_queue.get()
self.tts_model.save_wav(text=text,
audio = self.tts_model.apply_tts(text=text,
speaker=speaker,
sample_rate=sample_rate,
audio_path=filename)
sample_rate=sample_rate)
#audio_path=filename)
channels = 2 if (audio.ndim == 2 and audio.shape[1] == 2) else 1
#y = np.int16(audio)
y = np.int16(audio * 2 ** 15)
song = pydub.AudioSegment(y.tobytes(), frame_rate=sample_rate, sample_width=2, channels=channels)
song.export(filename, format="ogg", bitrate="16k")
def gen_all_audio(self, overwrite=False):
if self.story_settings.gen_audio and self.koboldai_vars.experimental_features: