revised env setup, random seed effective

2025-06-05 21:49:11 +02:00 · 2024-03-31 13:50:20 -07:00
parent 2f78e8d435
commit 991b1fe3bb
3 changed files with 35 additions and 10 deletions
--- a/inference_tts.ipynb
+++ b/inference_tts.ipynb
@@ -122,11 +122,13 @@
    "\n",
    "import torch\n",
    "import torchaudio\n",
+    "import numpy as np\n",
+    "import random\n",
    "\n",
    "from data.tokenizer import (\n",
    "    AudioTokenizer,\n",
    "    TextTokenizer,\n",
-    ")"
+    ")\n"
   ]
  },
  {
@@ -241,6 +243,16 @@
    "sample_batch_size = 4 # NOTE: if the if there are long silence or unnaturally strecthed words, increase sample_batch_size to 5 or higher. What this will do to the model is that the model will run sample_batch_size examples of the same audio, and pick the one that's the shortest. So if the speech rate of the generated is too fast change it to a smaller number.\n",
    "seed = 1 # change seed if you are still unhappy with the result\n",
    "\n",
+    "def seed_everything(seed):\n",
+    "    os.environ['PYTHONHASHSEED'] = str(seed)\n",
+    "    random.seed(seed)\n",
+    "    np.random.seed(seed)\n",
+    "    torch.manual_seed(seed)\n",
+    "    torch.cuda.manual_seed(seed)\n",
+    "    torch.backends.cudnn.benchmark = False\n",
+    "    torch.backends.cudnn.deterministic = True\n",
+    "seed_everything(seed)\n",
+    "\n",
    "decode_config = {'top_k': top_k, 'top_p': top_p, 'temperature': temperature, 'stop_repetition': stop_repetition, 'kvcache': kvcache, \"codec_audio_sr\": codec_audio_sr, \"codec_sr\": codec_sr, \"silence_tokens\": silence_tokens, \"sample_batch_size\": sample_batch_size}\n",
    "from inference_tts_scale import inference_one_sample\n",
    "concated_audio, gen_audio = inference_one_sample(model, ckpt[\"config\"], phn2num, text_tokenizer, audio_tokenizer, audio_fn, target_transcript, device, decode_config, prompt_end_frame)\n",
@@ -280,7 +292,7 @@
  "kernelspec": {
   "display_name": "voicecraft",
   "language": "python",
-   "name": "voicecraft"
+   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
@@ -292,7 +304,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.19"
+   "version": "3.9.18"
  }
 },
 "nbformat": 4,