README update, gradio_app.ipynb update, debug print removed
This commit is contained in:
parent
bbe3437b8d
commit
94e9f9bd42
|
@ -96,6 +96,11 @@ Checkout [`inference_speech_editing.ipynb`](./inference_speech_editing.ipynb) an
|
||||||
## Gradio
|
## Gradio
|
||||||
After environment setup install additional dependencies:
|
After environment setup install additional dependencies:
|
||||||
```bash
|
```bash
|
||||||
|
apt-get install -y espeak espeak-data libespeak1 libespeak-dev
|
||||||
|
apt-get install -y festival*
|
||||||
|
apt-get install -y build-essential
|
||||||
|
apt-get install -y flac libasound2-dev libsndfile1-dev vorbis-tools
|
||||||
|
apt-get install -y libxml2-dev libxslt-dev zlib1g-dev
|
||||||
pip install -r gradio_requirements.txt
|
pip install -r gradio_requirements.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -8,84 +8,28 @@
|
||||||
"### Only do the below if you are using docker"
|
"### Only do the below if you are using docker"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "270aa2cc",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# install OS deps\n",
|
|
||||||
"!sudo apt-get update && sudo apt-get install -y \\\n",
|
|
||||||
" git-core \\\n",
|
|
||||||
" ffmpeg \\\n",
|
|
||||||
" espeak-ng"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "8ba5f452",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Update and setup Conda voicecraft environment\n",
|
|
||||||
"!conda update -y -n base -c conda-forge conda\n",
|
|
||||||
"!conda create -y -n voicecraft python=3.9.16 && \\\n",
|
|
||||||
" conda init bash"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "4ef2935c",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# install conda and pip stuff in the activated conda above context\n",
|
|
||||||
"!echo -e \"Grab a cup a coffee and a slice of pizza...\\n\\n\"\n",
|
|
||||||
"\n",
|
|
||||||
"# make sure $HOME and $USER are setup so this will source the conda environment\n",
|
|
||||||
"!source ~/.bashrc && \\\n",
|
|
||||||
" conda activate voicecraft && \\\n",
|
|
||||||
" conda install -y -c conda-forge montreal-forced-aligner=2.2.17 openfst=1.8.2 kaldi=5.5.1068 && \\\n",
|
|
||||||
" pip install torch==2.0.1 && \\\n",
|
|
||||||
" pip install tensorboard==2.16.2 && \\\n",
|
|
||||||
" pip install phonemizer==3.2.1 && \\\n",
|
|
||||||
" pip install torchaudio==2.0.2 && \\\n",
|
|
||||||
" pip install datasets==2.16.0 && \\\n",
|
|
||||||
" pip install torchmetrics==0.11.1\n",
|
|
||||||
"\n",
|
|
||||||
"# do this one last otherwise you'll get an error about torch compiler missing due to xformer mismatch\n",
|
|
||||||
"!source ~/.bashrc && \\\n",
|
|
||||||
" conda activate voicecraft && \\\n",
|
|
||||||
" pip install -e git+https://github.com/facebookresearch/audiocraft.git@c5157b5bf14bf83449c17ea1eeb66c19fb4bc7f0#egg=audiocraft"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "2fca57eb",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# okay setup the conda environment such that jupyter notebook can find the kernel\n",
|
|
||||||
"!source ~/.bashrc && \\\n",
|
|
||||||
" conda activate voicecraft && \\\n",
|
|
||||||
" conda install -y -n voicecraft ipykernel --update-deps --force-reinstall\n",
|
|
||||||
"\n",
|
|
||||||
"# installs the Jupyter kernel into /home/myusername/.local/share/jupyter/kernels/voicecraft\n",
|
|
||||||
"!source ~/.bashrc && \\\n",
|
|
||||||
" conda activate voicecraft && \\\n",
|
|
||||||
" python3 -m ipykernel install --user --name=voicecraft"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "961faa43",
|
"id": "961faa43",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"!source ~/.bashrc && \\\n",
|
||||||
|
" apt-get update && \\\n",
|
||||||
|
" apt-get install -y espeak espeak-data libespeak1 libespeak-dev && \\\n",
|
||||||
|
" apt-get install -y festival* && \\\n",
|
||||||
|
" apt-get install -y build-essential && \\\n",
|
||||||
|
" apt-get install -y flac libasound2-dev libsndfile1-dev vorbis-tools && \\\n",
|
||||||
|
" apt-get install -y libxml2-dev libxslt-dev zlib1g-dev"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "598d75cf",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"!source ~/.bashrc && \\\n",
|
"!source ~/.bashrc && \\\n",
|
||||||
" conda activate voicecraft && \\\n",
|
" conda activate voicecraft && \\\n",
|
||||||
|
|
|
@ -75,9 +75,6 @@ class WhisperxModel:
|
||||||
def load_models(whisper_backend_name, whisper_model_name, alignment_model_name, voicecraft_model_name):
|
def load_models(whisper_backend_name, whisper_model_name, alignment_model_name, voicecraft_model_name):
|
||||||
global transcribe_model, align_model, voicecraft_model
|
global transcribe_model, align_model, voicecraft_model
|
||||||
|
|
||||||
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
|
||||||
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
|
||||||
|
|
||||||
if alignment_model_name is not None:
|
if alignment_model_name is not None:
|
||||||
align_model = WhisperxAlignModel()
|
align_model = WhisperxAlignModel()
|
||||||
|
|
||||||
|
@ -178,7 +175,6 @@ def align(seed, transcript, audio_path):
|
||||||
} for fragment in fragments["fragments"]]
|
} for fragment in fragments["fragments"]]
|
||||||
segments = align_model.align(segments, audio_path)
|
segments = align_model.align(segments, audio_path)
|
||||||
state = get_transcribe_state(segments)
|
state = get_transcribe_state(segments)
|
||||||
print(state)
|
|
||||||
|
|
||||||
return [
|
return [
|
||||||
state["transcript_with_start_time"], state["transcript_with_end_time"],
|
state["transcript_with_start_time"], state["transcript_with_end_time"],
|
||||||
|
|
Loading…
Reference in New Issue