mirror of
https://github.com/jasonppy/VoiceCraft.git
synced 2025-02-20 05:30:45 +01:00
global models
This commit is contained in:
parent
74fa65979d
commit
f9fed26b15
@ -10,8 +10,12 @@ import os
|
|||||||
import io
|
import io
|
||||||
|
|
||||||
|
|
||||||
|
whisper_model, voicecraft_model = None, None
|
||||||
|
|
||||||
|
|
||||||
def load_models(whisper_model_choice, voicecraft_model_choice):
|
def load_models(whisper_model_choice, voicecraft_model_choice):
|
||||||
whisper_model, voicecraft_model = None, None
|
global whisper_model, voicecraft_model
|
||||||
|
|
||||||
if whisper_model_choice is not None:
|
if whisper_model_choice is not None:
|
||||||
import whisper
|
import whisper
|
||||||
from whisper.tokenizer import get_tokenizer
|
from whisper.tokenizer import get_tokenizer
|
||||||
@ -46,14 +50,10 @@ def load_models(whisper_model_choice, voicecraft_model_choice):
|
|||||||
"audio_tokenizer": AudioTokenizer(signature=encodec_fn)
|
"audio_tokenizer": AudioTokenizer(signature=encodec_fn)
|
||||||
}
|
}
|
||||||
|
|
||||||
return [
|
return gr.Audio(interactive=True)
|
||||||
whisper_model,
|
|
||||||
voicecraft_model,
|
|
||||||
gr.Audio(interactive=True),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def transcribe(whisper_model, audio_path):
|
def transcribe(audio_path):
|
||||||
if whisper_model is None:
|
if whisper_model is None:
|
||||||
raise gr.Error("Whisper model not loaded")
|
raise gr.Error("Whisper model not loaded")
|
||||||
|
|
||||||
@ -87,7 +87,7 @@ def get_output_audio(audio_tensors, codec_audio_sr):
|
|||||||
return buffer.read()
|
return buffer.read()
|
||||||
|
|
||||||
|
|
||||||
def run(voicecraft_model, left_margin, right_margin, codec_audio_sr, codec_sr, top_k, top_p, temperature,
|
def run(left_margin, right_margin, codec_audio_sr, codec_sr, top_k, top_p, temperature,
|
||||||
stop_repetition, sample_batch_size, kvcache, silence_tokens,
|
stop_repetition, sample_batch_size, kvcache, silence_tokens,
|
||||||
audio_path, word_info, transcript, smart_transcript,
|
audio_path, word_info, transcript, smart_transcript,
|
||||||
mode, prompt_end_time, edit_start_time, edit_end_time,
|
mode, prompt_end_time, edit_start_time, edit_end_time,
|
||||||
@ -371,8 +371,6 @@ with gr.Blocks() as app:
|
|||||||
silence_tokens = gr.Textbox(label="silence tokens", value="[1388,1898,131]")
|
silence_tokens = gr.Textbox(label="silence tokens", value="[1388,1898,131]")
|
||||||
|
|
||||||
|
|
||||||
whisper_model = gr.State()
|
|
||||||
voicecraft_model = gr.State()
|
|
||||||
audio_tensors = gr.State()
|
audio_tensors = gr.State()
|
||||||
word_info = gr.State()
|
word_info = gr.State()
|
||||||
|
|
||||||
@ -389,13 +387,13 @@ with gr.Blocks() as app:
|
|||||||
|
|
||||||
load_models_btn.click(fn=load_models,
|
load_models_btn.click(fn=load_models,
|
||||||
inputs=[whisper_model_choice, voicecraft_model_choice],
|
inputs=[whisper_model_choice, voicecraft_model_choice],
|
||||||
outputs=[whisper_model, voicecraft_model, input_audio])
|
outputs=[input_audio])
|
||||||
|
|
||||||
input_audio.change(fn=update_input_audio,
|
input_audio.change(fn=update_input_audio,
|
||||||
inputs=[input_audio],
|
inputs=[input_audio],
|
||||||
outputs=[prompt_end_time, edit_start_time, edit_end_time])
|
outputs=[prompt_end_time, edit_start_time, edit_end_time])
|
||||||
transcribe_btn.click(fn=transcribe,
|
transcribe_btn.click(fn=transcribe,
|
||||||
inputs=[whisper_model, input_audio],
|
inputs=[input_audio],
|
||||||
outputs=[original_transcript, transcript_with_start_time, transcript_with_end_time, edit_from_word, edit_to_word, word_info])
|
outputs=[original_transcript, transcript_with_start_time, transcript_with_end_time, edit_from_word, edit_to_word, word_info])
|
||||||
|
|
||||||
mode.change(fn=change_mode,
|
mode.change(fn=change_mode,
|
||||||
@ -404,7 +402,7 @@ with gr.Blocks() as app:
|
|||||||
|
|
||||||
run_btn.click(fn=run,
|
run_btn.click(fn=run,
|
||||||
inputs=[
|
inputs=[
|
||||||
voicecraft_model, left_margin, right_margin,
|
left_margin, right_margin,
|
||||||
codec_audio_sr, codec_sr,
|
codec_audio_sr, codec_sr,
|
||||||
top_k, top_p, temperature,
|
top_k, top_p, temperature,
|
||||||
stop_repetition, sample_batch_size,
|
stop_repetition, sample_batch_size,
|
||||||
@ -420,7 +418,7 @@ with gr.Blocks() as app:
|
|||||||
outputs=[sentence_audio])
|
outputs=[sentence_audio])
|
||||||
rerun_btn.click(fn=run,
|
rerun_btn.click(fn=run,
|
||||||
inputs=[
|
inputs=[
|
||||||
voicecraft_model, left_margin, right_margin,
|
left_margin, right_margin,
|
||||||
codec_audio_sr, codec_sr,
|
codec_audio_sr, codec_sr,
|
||||||
top_k, top_p, temperature,
|
top_k, top_p, temperature,
|
||||||
stop_repetition, sample_batch_size,
|
stop_repetition, sample_batch_size,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user