revised env setup, random seed effective

2025-06-05 21:49:11 +02:00 · 2024-03-31 13:50:20 -07:00
parent 2f78e8d435
commit 991b1fe3bb
3 changed files with 35 additions and 10 deletions
--- a/inference_speech_editing.ipynb
+++ b/inference_speech_editing.ipynb
@@ -30,13 +30,15 @@
    "# import libs\n",
    "import torch\n",
    "import torchaudio\n",
+    "import numpy as np\n",
+    "import random\n",
    "\n",
    "from data.tokenizer import (\n",
    "    AudioTokenizer,\n",
    "    TextTokenizer,\n",
    ")\n",
    "\n",
-    "from models import voicecraft\n"
+    "from models import voicecraft"
   ]
  },
  {
@@ -72,6 +74,15 @@
    "silence_tokens = [1388,1898,131] # if there are long silence in the generated audio, reduce the stop_repetition to 3, 2 or even 1\n",
    "stop_repetition = -1 # -1 means do not adjust prob of silence tokens. if there are long silence or unnaturally strecthed words, increase sample_batch_size to 2, 3 or even 4\n",
    "# what this will do to the model is that the model will run sample_batch_size examples of the same audio, and pick the one that's the shortest\n",
+    "def seed_everything(seed):\n",
+    "    os.environ['PYTHONHASHSEED'] = str(seed)\n",
+    "    random.seed(seed)\n",
+    "    np.random.seed(seed)\n",
+    "    torch.manual_seed(seed)\n",
+    "    torch.cuda.manual_seed(seed)\n",
+    "    torch.backends.cudnn.benchmark = False\n",
+    "    torch.backends.cudnn.deterministic = True\n",
+    "seed_everything(seed)\n",
    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
    "\n",
    "# point to the original file or record the file\n",