From 94e9f9bd4233b6510f4ff45c2cd291a9476957ce Mon Sep 17 00:00:00 2001
From: Stepan Zuev <zuev.step4n@gmail.com>
Date: Fri, 5 Apr 2024 04:40:57 +0300
Subject: [PATCH] README update, gradio_app.ipynb update, debug print removed

---
 README.md        |  5 +++
 gradio_app.ipynb | 88 +++++++++---------------------------------------
 gradio_app.py    |  4 ---
 3 files changed, 21 insertions(+), 76 deletions(-)

diff --git a/README.md b/README.md
index 2a5e247..41f1654 100644
--- a/README.md
+++ b/README.md
@@ -96,6 +96,11 @@ Checkout [`inference_speech_editing.ipynb`](./inference_speech_editing.ipynb) an
 ## Gradio
 After environment setup install additional dependencies:
 ```bash
+apt-get install -y espeak espeak-data libespeak1 libespeak-dev
+apt-get install -y festival*
+apt-get install -y build-essential
+apt-get install -y flac libasound2-dev libsndfile1-dev vorbis-tools
+apt-get install -y libxml2-dev libxslt-dev zlib1g-dev
 pip install -r gradio_requirements.txt
 ```
 
diff --git a/gradio_app.ipynb b/gradio_app.ipynb
index 0d3f946..7b13660 100644
--- a/gradio_app.ipynb
+++ b/gradio_app.ipynb
@@ -8,84 +8,28 @@
     "### Only do the below if you are using docker"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "270aa2cc",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# install OS deps\n",
-    "!sudo apt-get update && sudo apt-get install -y \\\n",
-    "    git-core \\\n",
-    "    ffmpeg \\\n",
-    "    espeak-ng"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8ba5f452",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Update and setup Conda voicecraft environment\n",
-    "!conda update -y -n base -c conda-forge conda\n",
-    "!conda create -y -n voicecraft python=3.9.16 && \\\n",
-    "    conda init bash"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4ef2935c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# install conda and pip stuff in the activated conda above context\n",
-    "!echo -e \"Grab a cup a coffee and a slice of pizza...\\n\\n\"\n",
-    "\n",
-    "# make sure $HOME and $USER are setup so this will source the conda environment\n",
-    "!source ~/.bashrc && \\\n",
-    "    conda activate voicecraft && \\\n",
-    "    conda install -y -c conda-forge montreal-forced-aligner=2.2.17 openfst=1.8.2 kaldi=5.5.1068 && \\\n",
-    "    pip install torch==2.0.1 && \\\n",
-    "    pip install tensorboard==2.16.2 && \\\n",
-    "    pip install phonemizer==3.2.1 && \\\n",
-    "    pip install torchaudio==2.0.2 && \\\n",
-    "    pip install datasets==2.16.0 && \\\n",
-    "    pip install torchmetrics==0.11.1\n",
-    "\n",
-    "# do this one last otherwise you'll get an error about torch compiler missing due to xformer mismatch\n",
-    "!source ~/.bashrc && \\\n",
-    "    conda activate voicecraft && \\\n",
-    "    pip install -e git+https://github.com/facebookresearch/audiocraft.git@c5157b5bf14bf83449c17ea1eeb66c19fb4bc7f0#egg=audiocraft"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2fca57eb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# okay setup the conda environment such that jupyter notebook can find the kernel\n",
-    "!source ~/.bashrc && \\\n",
-    "    conda activate voicecraft && \\\n",
-    "    conda install -y -n voicecraft ipykernel --update-deps --force-reinstall\n",
-    "\n",
-    "# installs the Jupyter kernel into /home/myusername/.local/share/jupyter/kernels/voicecraft\n",
-    "!source ~/.bashrc && \\\n",
-    "    conda activate voicecraft && \\\n",
-    "    python3 -m ipykernel install --user --name=voicecraft"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "961faa43",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "!source ~/.bashrc && \\\n",
+    "    apt-get update && \\\n",
+    "    apt-get install -y espeak espeak-data libespeak1 libespeak-dev && \\\n",
+    "    apt-get install -y festival* && \\\n",
+    "    apt-get install -y build-essential && \\\n",
+    "    apt-get install -y flac libasound2-dev libsndfile1-dev vorbis-tools && \\\n",
+    "    apt-get install -y libxml2-dev libxslt-dev zlib1g-dev"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "598d75cf",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "!source ~/.bashrc && \\\n",
     "    conda activate voicecraft && \\\n",
diff --git a/gradio_app.py b/gradio_app.py
index 4321a11..5e349fe 100644
--- a/gradio_app.py
+++ b/gradio_app.py
@@ -75,9 +75,6 @@ class WhisperxModel:
 def load_models(whisper_backend_name, whisper_model_name, alignment_model_name, voicecraft_model_name):
     global transcribe_model, align_model, voicecraft_model
 
-    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
-    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
-
     if alignment_model_name is not None:
         align_model = WhisperxAlignModel()
 
@@ -178,7 +175,6 @@ def align(seed, transcript, audio_path):
     } for fragment in fragments["fragments"]]
     segments = align_model.align(segments, audio_path)
     state = get_transcribe_state(segments)
-    print(state)
 
     return [
         state["transcript_with_start_time"], state["transcript_with_end_time"],