Added functional audio generation (requires ffmpeg installed or ffmpeg.exe in koboldai directory)

added load v1 story conversion to saved v2 story if loaded from disk
This commit is contained in:
ebolam
2022-12-05 20:52:38 -05:00
parent f77f6f7fba
commit 1f62f154a5
6 changed files with 86 additions and 20 deletions

View File

@@ -7251,18 +7251,20 @@ def loadRequest(loadpath, filename=None):
if(isinstance(loadpath, str)):
with open(loadpath, "r") as file:
js = json.load(file)
from_file=loadpath
if(filename is None):
filename = path.basename(loadpath)
else:
js = loadpath
if(filename is None):
filename = "untitled.json"
from_file=None
js['v1_loadpath'] = loadpath
js['v1_filename'] = filename
logger.debug("Loading JSON data took {}s".format(time.time()-start_time))
loadJSON(js)
loadJSON(js, from_file=from_file)
def loadJSON(json_text_or_dict):
def loadJSON(json_text_or_dict, from_file=None):
logger.debug("Loading JSON Story")
logger.debug("Called from {}".format(inspect.stack()[1].function))
start_time = time.time()
@@ -7275,13 +7277,13 @@ def loadJSON(json_text_or_dict):
if json_data['file_version'] == 2:
load_story_v2(json_data)
else:
load_story_v1(json_data)
load_story_v1(json_data, from_file=from_file)
else:
load_story_v1(json_data)
load_story_v1(json_data, from_file=from_file)
logger.debug("Calcing AI Text from Story Load")
ignore = koboldai_vars.calc_ai_text()
def load_story_v1(js):
def load_story_v1(js, from_file=None):
logger.debug("Loading V1 Story")
logger.debug("Called from {}".format(inspect.stack()[1].function))
loadpath = js['v1_loadpath'] if 'v1_loadpath' in js else koboldai_vars.savedir
@@ -7402,6 +7404,12 @@ def load_story_v1(js):
print("{0}Story loaded from {1}!{2}".format(colors.GREEN, filename, colors.END))
send_debug()
if from_file is not None:
#Save the file so we get a new V2 format, then move the save file into the proper directory
koboldai_vars.save_story()
shutil.move(from_file, koboldai_vars.save_paths.story.replace("story.json", "v1_file.json"))
def load_story_v2(js):
logger.debug("Loading V2 Story")
@@ -9881,7 +9889,7 @@ def UI_2_test_match():
@app.route("/audio")
@logger.catch
def UI_2_audio():
action_id = int(request.args['id']) if 'id' in request.args else len(koboldai_vars.actions)
action_id = int(request.args['id']) if 'id' in request.args else koboldai_vars.actions.action_count
filename = os.path.join(koboldai_vars.save_paths.generated_audio, f"{action_id}.ogg")
if not os.path.exists(filename):
koboldai_vars.actions.gen_audio(action_id)

View File

@@ -1058,6 +1058,7 @@ class story_settings(settings):
self.assign_world_info_to_actions(action_id=-1, wuid=None)
process_variable_changes(self.socketio, self.__class__.__name__.replace("_settings", ""), 'prompt_wi_highlighted_text', self.prompt_wi_highlighted_text, None)
ignore = self.koboldai_vars.calc_ai_text()
self.actions.gen_audio(action_id=-1)
#Because we have seperate variables for action types, this syncs them
elif name == 'storymode':
@@ -1871,7 +1872,10 @@ class KoboldStoryRegister(object):
filename = os.path.join(self.koboldai_vars.save_paths.generated_audio, f"{action_id}.ogg")
if overwrite or not os.path.exists(filename):
self.make_audio_queue.put((self.actions[action_id]['Selected Text'], filename))
if action_id == -1:
self.make_audio_queue.put((self.koboldai_vars.prompt, filename))
else:
self.make_audio_queue.put((self.actions[action_id]['Selected Text'], filename))
if self.make_audio_thread is None or not self.make_audio_thread.is_alive():
self.make_audio_thread = threading.Thread(target=self.create_wave, args=(self.tts_model, self.make_audio_queue))
self.make_audio_thread.start()
@@ -1894,7 +1898,7 @@ class KoboldStoryRegister(object):
def gen_all_audio(self, overwrite=False):
if self.story_settings.gen_audio and self.koboldai_vars.experimental_features:
for i in reversed(list(self.actions.keys())):
for i in reversed(list(self.actions.keys())+[-1]):
self.gen_audio(i, overwrite=False)
else:
print("{} and {}".format(self.story_settings.gen_audio, self.koboldai_vars.experimental_features))

View File

@@ -1386,20 +1386,20 @@ body {
grid-area: inputrow;
z-index: 2;
display: grid;
grid-template-areas: "textarea statusbar statusbar statusbar"
"textarea submit submit submit"
"textarea back redo retry";
grid-template-columns: auto 30px 30px 30px;
grid-template-areas: "textarea tts_controls statusbar statusbar statusbar"
"textarea tts_controls submit submit submit"
"textarea tts_controls back redo retry";
grid-template-columns: auto min-content 30px 30px 30px;
grid-template-rows: 0px 60px 40px;
gap: 1px;
}
.inputrow[story_storymode="1"] {
grid-template-areas: "adventure_mode textarea statusbar statusbar statusbar"
"adventure_mode textarea submit submit submit"
"adventure_mode textarea back redo retry";
grid-template-columns: 80px auto 30px 30px 30px;
grid-template-areas: "adventure_mode textarea tts_controls statusbar statusbar statusbar"
"adventure_mode textarea tts_controls submit submit submit"
"adventure_mode textarea tts_controls back redo retry";
grid-template-columns: 80px auto min-content 30px 30px 30px;
grid-template-rows: 0px 60px 40px;
gap: 1px;
@@ -1540,6 +1540,19 @@ body {
z-index: 9999;
}
.tts_controls.hidden[story_gen_audio="true"] {
display: inherit !important;
}
.inputrow .tts_controls {
grid-area: tts_controls;
padding: 0px;
height: 100%;
width: 100%;
text-align: center;
overflow: hidden;
}
.inputrow .back {
grid-area: back;
padding: 0px;

View File

@@ -101,6 +101,8 @@ var finder_actions = [
const context_menu_actions = {
gamescreen: [
{label: "Speak", icon: "record_voice_over", enabledOn: "SELECTION", click: speak_audio},
null,
{label: "Cut", icon: "content_cut", enabledOn: "SELECTION", click: cut},
{label: "Copy", icon: "content_copy", enabledOn: "SELECTION", click: copy},
{label: "Paste", icon: "content_paste", enabledOn: "SELECTION", click: paste},
@@ -3165,6 +3167,41 @@ function retry_from_here() {
}
}
function speak_audio() {
let chunk = null;
for (element of document.getElementsByClassName("editing")) {
if (element.id == 'story_prompt') {
chunk = -1
} else {
chunk = parseInt(element.id.split(" ").at(-1));
}
element.classList.remove("editing");
}
if (chunk != null) {
action_count = parseInt(document.getElementById("action_count").textContent);
//console.log(chunk);
document.getElementById("reader").src = "/audio?id="+chunk;
document.getElementById("reader").play();
document.getElementById("play_tts").textContent = "pause";
}
}
function play_pause_tts() {
if (document.getElementById("reader").paused) {
document.getElementById("reader").play();
document.getElementById("play_tts").textContent = "pause";
} else {
document.getElementById("reader").pause();
document.getElementById("play_tts").textContent = "play_arrow";
}
}
function stop_tts() {
document.getElementById("reader").src="";
document.getElementById("reader").src="/audio";
document.getElementById("play_tts").textContent = "play_arrow";
}
function view_selection_probabilities() {
// Not quite sure how this should work yet. Probabilities are obviously on
// the token level, which we have no UI representation of. There are other

View File

@@ -98,13 +98,17 @@
<div><span class="var_sync_model_horde_wait_time"></span> sec left</div>
</div>
</div><br>
<span class="tts_controls hidden var_sync_alt_story_gen_audio">
<button type="button" class="btn action_button" style="width: 30px; padding: 0px;" onclick='play_pause_tts()' aria-label="play"><span id="play_tts" class="material-icons-outlined" style="font-size: 1.4em;">play_arrow</span></button>
<button type="button" class="btn action_button" style="width: 30px; padding: 0px;" onclick='stop_tts()' aria-label="play"><span id="stop_tts" class="material-icons-outlined" style="font-size: 1.4em;">stop</span></button>
</span>
<button type="button" class="btn action_button submit var_sync_alt_system_aibusy" system_aibusy=False id="btnsubmit"
onclick="socket.emit('submit', {'data': document.getElementById('input_text').value, 'theme': document.getElementById('themetext').value});document.getElementById('input_text').value = '';document.getElementById('themetext').value = '';"
>Submit</button>
<button type="button" class="btn action_button submited var_sync_alt_system_aibusy" system_aibusy=False id="btnsent"><img id="thinking" src="static/thinking.gif" class="force_center" onclick="socket.emit('abort','');"></button>
<button type="button" class="btn action_button back" onclick="socket.emit('back', {});" aria-label="undo"><span class="oi" data-glyph="action-undo"></span></button>
<button type="button" class="btn action_button redo" onclick="socket.emit('redo', {});" aria-label="redo"><span class="oi" data-glyph="action-redo"></span></button>
<button type="button" class="btn action_button retry" onclick="socket.emit('retry', {});" aria-label="retry"><span class="oi" data-glyph="loop-circular"></span></button>
<button type="button" class="btn action_button back" onclick="socket.emit('back', {});" aria-label="undo"><span class="material-icons-outlined" style="font-size: 1.4em;">replay</span></button>
<button type="button" class="btn action_button redo" onclick="socket.emit('redo', {});" aria-label="redo"><span class="material-icons-outlined" style="font-size: 1.4em;">arrow_forward</span></button>
<button type="button" class="btn action_button retry" onclick="socket.emit('retry', {});" aria-label="retry"><span class="material-icons-outlined" style="font-size: 1.4em;">autorenew</span></button>
</div>
</div>

View File

@@ -84,7 +84,7 @@
<hr/>
</div>
<div id="tts" class="var_sync_alt_system_experimental_features">
<audio controls id="reader" preload=none>
<audio id="reader" preload=none src="/audio" />
</div>
<div class="setting_tile_area">
{% with menu='Home' %}