mirror of
https://github.com/jasonppy/VoiceCraft.git
synced 2025-01-19 12:29:47 +01:00
README update, gradio_app.ipynb update, debug print removed
This commit is contained in:
parent
bbe3437b8d
commit
94e9f9bd42
@ -96,6 +96,11 @@ Checkout [`inference_speech_editing.ipynb`](./inference_speech_editing.ipynb) an
|
||||
## Gradio
|
||||
After environment setup install additional dependencies:
|
||||
```bash
|
||||
apt-get install -y espeak espeak-data libespeak1 libespeak-dev
|
||||
apt-get install -y festival*
|
||||
apt-get install -y build-essential
|
||||
apt-get install -y flac libasound2-dev libsndfile1-dev vorbis-tools
|
||||
apt-get install -y libxml2-dev libxslt-dev zlib1g-dev
|
||||
pip install -r gradio_requirements.txt
|
||||
```
|
||||
|
||||
|
@ -8,84 +8,28 @@
|
||||
"### Only do the below if you are using docker"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "270aa2cc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# install OS deps\n",
|
||||
"!sudo apt-get update && sudo apt-get install -y \\\n",
|
||||
" git-core \\\n",
|
||||
" ffmpeg \\\n",
|
||||
" espeak-ng"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8ba5f452",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Update and setup Conda voicecraft environment\n",
|
||||
"!conda update -y -n base -c conda-forge conda\n",
|
||||
"!conda create -y -n voicecraft python=3.9.16 && \\\n",
|
||||
" conda init bash"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4ef2935c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# install conda and pip stuff in the activated conda above context\n",
|
||||
"!echo -e \"Grab a cup a coffee and a slice of pizza...\\n\\n\"\n",
|
||||
"\n",
|
||||
"# make sure $HOME and $USER are setup so this will source the conda environment\n",
|
||||
"!source ~/.bashrc && \\\n",
|
||||
" conda activate voicecraft && \\\n",
|
||||
" conda install -y -c conda-forge montreal-forced-aligner=2.2.17 openfst=1.8.2 kaldi=5.5.1068 && \\\n",
|
||||
" pip install torch==2.0.1 && \\\n",
|
||||
" pip install tensorboard==2.16.2 && \\\n",
|
||||
" pip install phonemizer==3.2.1 && \\\n",
|
||||
" pip install torchaudio==2.0.2 && \\\n",
|
||||
" pip install datasets==2.16.0 && \\\n",
|
||||
" pip install torchmetrics==0.11.1\n",
|
||||
"\n",
|
||||
"# do this one last otherwise you'll get an error about torch compiler missing due to xformer mismatch\n",
|
||||
"!source ~/.bashrc && \\\n",
|
||||
" conda activate voicecraft && \\\n",
|
||||
" pip install -e git+https://github.com/facebookresearch/audiocraft.git@c5157b5bf14bf83449c17ea1eeb66c19fb4bc7f0#egg=audiocraft"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2fca57eb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# okay setup the conda environment such that jupyter notebook can find the kernel\n",
|
||||
"!source ~/.bashrc && \\\n",
|
||||
" conda activate voicecraft && \\\n",
|
||||
" conda install -y -n voicecraft ipykernel --update-deps --force-reinstall\n",
|
||||
"\n",
|
||||
"# installs the Jupyter kernel into /home/myusername/.local/share/jupyter/kernels/voicecraft\n",
|
||||
"!source ~/.bashrc && \\\n",
|
||||
" conda activate voicecraft && \\\n",
|
||||
" python3 -m ipykernel install --user --name=voicecraft"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "961faa43",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!source ~/.bashrc && \\\n",
|
||||
" apt-get update && \\\n",
|
||||
" apt-get install -y espeak espeak-data libespeak1 libespeak-dev && \\\n",
|
||||
" apt-get install -y festival* && \\\n",
|
||||
" apt-get install -y build-essential && \\\n",
|
||||
" apt-get install -y flac libasound2-dev libsndfile1-dev vorbis-tools && \\\n",
|
||||
" apt-get install -y libxml2-dev libxslt-dev zlib1g-dev"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "598d75cf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!source ~/.bashrc && \\\n",
|
||||
" conda activate voicecraft && \\\n",
|
||||
|
@ -75,9 +75,6 @@ class WhisperxModel:
|
||||
def load_models(whisper_backend_name, whisper_model_name, alignment_model_name, voicecraft_model_name):
|
||||
global transcribe_model, align_model, voicecraft_model
|
||||
|
||||
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
||||
|
||||
if alignment_model_name is not None:
|
||||
align_model = WhisperxAlignModel()
|
||||
|
||||
@ -178,7 +175,6 @@ def align(seed, transcript, audio_path):
|
||||
} for fragment in fragments["fragments"]]
|
||||
segments = align_model.align(segments, audio_path)
|
||||
state = get_transcribe_state(segments)
|
||||
print(state)
|
||||
|
||||
return [
|
||||
state["transcript_with_start_time"], state["transcript_with_end_time"],
|
||||
|
Loading…
Reference in New Issue
Block a user