diff --git a/README.md b/README.md index f2a1df3..a1dc4b3 100644 --- a/README.md +++ b/README.md @@ -11,18 +11,19 @@ To clone or edit an unseen voice, VoiceCraft needs only a few seconds of referen :star: 03/28/2024: Model weights are up on HuggingFace🤗 [here](https://huggingface.co/pyp1/VoiceCraft/tree/main)! ## QuickStart -For Linux only, or likely Windows Subsystem for Linux (WSL) ubuntu. +Tested on Linux and Windows and should work with any host with docker installed. ```bash # 1. clone the repo on in a directory on a drive with plenty of free space git clone git@github.com:jasonppy/VoiceCraft.git cd VoiceCraft -# 2. assumes you have docker installed with nvidia container container-toolkit +# 2. assumes you have docker installed with nvidia container container-toolkit (windows has this built into the driver) # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/1.13.5/install-guide.html # sudo apt-get install -y nvidia-container-toolkit-base || yay -Syu nvidia-container-toolkit || echo etc... # 3. Try to start an existing container otherwise create a new one passing in all GPUs -./start-jupyter.sh +./start-jupyter.sh # linux +start-jupyter.bat # windows # 4. now open a webpage on the host box to the URL shown at the bottom of: docker logs jupyter diff --git a/inference_tts.ipynb b/inference_tts.ipynb index 8ca6c01..c8d5163 100644 --- a/inference_tts.ipynb +++ b/inference_tts.ipynb @@ -6,9 +6,9 @@ "source": [ "VoiceCraft Inference Text To Speech Demo\n", "===\n", - "This will install a bunch of garbage all over so consider using a docker container to contain the cruft.\n", + "This will install a ton of dependencies all over so consider using the provided docker container start-jupyter script to keep the cruft off your dev box.\n", "\n", - "Run the next 5 cells one at a time then change the Jupyter Notebook Kernel to use the voicecraft environment." + "Run the next cells one at a time up until the *STOP* and follow those instructions before continuing. You only have to do this the first time to setup the container." ] }, { @@ -71,7 +71,12 @@ "# okay setup the conda environment such that jupyter notebook can find the kernel\n", "!source ~/.bashrc && \\\n", " conda activate voicecraft && \\\n", - " conda install -y -n voicecraft ipykernel --update-deps --force-reinstall" + " conda install -y -n voicecraft ipykernel --update-deps --force-reinstall\n", + "\n", + "# installs the Jupyter kernel into /home/myusername/.local/share/jupyter/kernels/voicecraft\n", + "!source ~/.bashrc && \\\n", + " conda activate voicecraft && \\\n", + " python3 -m ipykernel install --user --name=voicecraft" ] }, { @@ -81,10 +86,11 @@ "# STOP\n", "You have to do this part manually using the mouse/keyboard and the tabs at the top.\n", "\n", + "* Refresh your browser to make sure it picks up the new kernel.\n", "* Kernel -> Change Kernel -> Select Kernel -> voicecraft\n", "* Kernel -> Restart Kernel -> Yes\n", "\n", - "Now you can run the rest of the notebook and get an audio sample output. It will download more models and such." + "Now you can run the rest of the notebook and get an audio sample output. It will automatically download more models and such. The next time you use this container, you can just start below here as the dependencies will remain available until you delete the docker container." ] }, { @@ -153,7 +159,11 @@ " mfa model download dictionary english_us_arpa && \\\n", " mfa model download acoustic english_us_arpa\n", "\n", - "os.system(f\". ~/.bashrc && conda activate voicecraft && mfa align -j 1 --output_format csv {temp_folder} english_us_arpa english_us_arpa {align_temp}\")\n", + "#os.system(f\". ~/.bashrc && conda activate voicecraft && mfa align -j 1 --output_format csv {temp_folder} english_us_arpa english_us_arpa {align_temp}\")\n", + "!source ~/.bashrc && \\\n", + " conda activate voicecraft && \\\n", + " mfa align -v --clean -j 1 --output_format csv {temp_folder} \\\n", + " english_us_arpa english_us_arpa {align_temp}\n", "\n", "# if the above fails, it could be because the audio is too hard for the alignment model, increasing the beam size usually solves the issue\n", "# os.system(f\"mfa align -j 1 --output_format csv {temp_folder} english_us_arpa english_us_arpa {align_temp} --beam 1000 --retry_beam 2000\")\n", diff --git a/start-jupyter.bat b/start-jupyter.bat new file mode 100644 index 0000000..5480710 --- /dev/null +++ b/start-jupyter.bat @@ -0,0 +1,21 @@ +:: Windows Docker Context Batch File +:: No need to install nvidia toolkit as Windows driver supports docker GPUs +:: Credit to github/jay-c88 for this +:: https://github.com/jasonppy/VoiceCraft/pull/25#issuecomment-2028053878 +@echo off + +docker start jupyter > nul 2> nul || ^ +docker run -it ^ +-d ^ +--gpus all ^ +-p 8888:8888 ^ +--name jupyter ^ +--user root ^ +-e NB_USER="%username%" ^ +-e CHOWN_HOME=yes ^ +-e GRANT_SUDO=yes ^ +-w "/home/%username%" ^ +-v %cd%:"/home/%username%/work" ^ +jupyter/base-notebook + +pause