From 94e9f9bd4233b6510f4ff45c2cd291a9476957ce Mon Sep 17 00:00:00 2001 From: Stepan Zuev Date: Fri, 5 Apr 2024 04:40:57 +0300 Subject: [PATCH] README update, gradio_app.ipynb update, debug print removed --- README.md | 5 +++ gradio_app.ipynb | 88 +++++++++--------------------------------------- gradio_app.py | 4 --- 3 files changed, 21 insertions(+), 76 deletions(-) diff --git a/README.md b/README.md index 2a5e247..41f1654 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,11 @@ Checkout [`inference_speech_editing.ipynb`](./inference_speech_editing.ipynb) an ## Gradio After environment setup install additional dependencies: ```bash +apt-get install -y espeak espeak-data libespeak1 libespeak-dev +apt-get install -y festival* +apt-get install -y build-essential +apt-get install -y flac libasound2-dev libsndfile1-dev vorbis-tools +apt-get install -y libxml2-dev libxslt-dev zlib1g-dev pip install -r gradio_requirements.txt ``` diff --git a/gradio_app.ipynb b/gradio_app.ipynb index 0d3f946..7b13660 100644 --- a/gradio_app.ipynb +++ b/gradio_app.ipynb @@ -8,84 +8,28 @@ "### Only do the below if you are using docker" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "270aa2cc", - "metadata": {}, - "outputs": [], - "source": [ - "# install OS deps\n", - "!sudo apt-get update && sudo apt-get install -y \\\n", - " git-core \\\n", - " ffmpeg \\\n", - " espeak-ng" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8ba5f452", - "metadata": {}, - "outputs": [], - "source": [ - "# Update and setup Conda voicecraft environment\n", - "!conda update -y -n base -c conda-forge conda\n", - "!conda create -y -n voicecraft python=3.9.16 && \\\n", - " conda init bash" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4ef2935c", - "metadata": {}, - "outputs": [], - "source": [ - "# install conda and pip stuff in the activated conda above context\n", - "!echo -e \"Grab a cup a coffee and a slice of pizza...\\n\\n\"\n", - "\n", - "# make sure $HOME and $USER are setup so this will source the conda environment\n", - "!source ~/.bashrc && \\\n", - " conda activate voicecraft && \\\n", - " conda install -y -c conda-forge montreal-forced-aligner=2.2.17 openfst=1.8.2 kaldi=5.5.1068 && \\\n", - " pip install torch==2.0.1 && \\\n", - " pip install tensorboard==2.16.2 && \\\n", - " pip install phonemizer==3.2.1 && \\\n", - " pip install torchaudio==2.0.2 && \\\n", - " pip install datasets==2.16.0 && \\\n", - " pip install torchmetrics==0.11.1\n", - "\n", - "# do this one last otherwise you'll get an error about torch compiler missing due to xformer mismatch\n", - "!source ~/.bashrc && \\\n", - " conda activate voicecraft && \\\n", - " pip install -e git+https://github.com/facebookresearch/audiocraft.git@c5157b5bf14bf83449c17ea1eeb66c19fb4bc7f0#egg=audiocraft" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2fca57eb", - "metadata": {}, - "outputs": [], - "source": [ - "# okay setup the conda environment such that jupyter notebook can find the kernel\n", - "!source ~/.bashrc && \\\n", - " conda activate voicecraft && \\\n", - " conda install -y -n voicecraft ipykernel --update-deps --force-reinstall\n", - "\n", - "# installs the Jupyter kernel into /home/myusername/.local/share/jupyter/kernels/voicecraft\n", - "!source ~/.bashrc && \\\n", - " conda activate voicecraft && \\\n", - " python3 -m ipykernel install --user --name=voicecraft" - ] - }, { "cell_type": "code", "execution_count": null, "id": "961faa43", "metadata": {}, "outputs": [], + "source": [ + "!source ~/.bashrc && \\\n", + " apt-get update && \\\n", + " apt-get install -y espeak espeak-data libespeak1 libespeak-dev && \\\n", + " apt-get install -y festival* && \\\n", + " apt-get install -y build-essential && \\\n", + " apt-get install -y flac libasound2-dev libsndfile1-dev vorbis-tools && \\\n", + " apt-get install -y libxml2-dev libxslt-dev zlib1g-dev" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "598d75cf", + "metadata": {}, + "outputs": [], "source": [ "!source ~/.bashrc && \\\n", " conda activate voicecraft && \\\n", diff --git a/gradio_app.py b/gradio_app.py index 4321a11..5e349fe 100644 --- a/gradio_app.py +++ b/gradio_app.py @@ -75,9 +75,6 @@ class WhisperxModel: def load_models(whisper_backend_name, whisper_model_name, alignment_model_name, voicecraft_model_name): global transcribe_model, align_model, voicecraft_model - os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" - os.environ["CUDA_VISIBLE_DEVICES"] = "0" - if alignment_model_name is not None: align_model = WhisperxAlignModel() @@ -178,7 +175,6 @@ def align(seed, transcript, audio_path): } for fragment in fragments["fragments"]] segments = align_model.align(segments, audio_path) state = get_transcribe_state(segments) - print(state) return [ state["transcript_with_start_time"], state["transcript_with_end_time"],