revised env setup, random seed effective

This commit is contained in:
jason-on-salt-a40
2024-03-31 13:50:20 -07:00
parent 2f78e8d435
commit 991b1fe3bb
3 changed files with 35 additions and 10 deletions

View File

@@ -122,11 +122,13 @@
"\n",
"import torch\n",
"import torchaudio\n",
"import numpy as np\n",
"import random\n",
"\n",
"from data.tokenizer import (\n",
" AudioTokenizer,\n",
" TextTokenizer,\n",
")"
")\n"
]
},
{
@@ -241,6 +243,16 @@
"sample_batch_size = 4 # NOTE: if the if there are long silence or unnaturally strecthed words, increase sample_batch_size to 5 or higher. What this will do to the model is that the model will run sample_batch_size examples of the same audio, and pick the one that's the shortest. So if the speech rate of the generated is too fast change it to a smaller number.\n",
"seed = 1 # change seed if you are still unhappy with the result\n",
"\n",
"def seed_everything(seed):\n",
" os.environ['PYTHONHASHSEED'] = str(seed)\n",
" random.seed(seed)\n",
" np.random.seed(seed)\n",
" torch.manual_seed(seed)\n",
" torch.cuda.manual_seed(seed)\n",
" torch.backends.cudnn.benchmark = False\n",
" torch.backends.cudnn.deterministic = True\n",
"seed_everything(seed)\n",
"\n",
"decode_config = {'top_k': top_k, 'top_p': top_p, 'temperature': temperature, 'stop_repetition': stop_repetition, 'kvcache': kvcache, \"codec_audio_sr\": codec_audio_sr, \"codec_sr\": codec_sr, \"silence_tokens\": silence_tokens, \"sample_batch_size\": sample_batch_size}\n",
"from inference_tts_scale import inference_one_sample\n",
"concated_audio, gen_audio = inference_one_sample(model, ckpt[\"config\"], phn2num, text_tokenizer, audio_tokenizer, audio_fn, target_transcript, device, decode_config, prompt_end_frame)\n",
@@ -280,7 +292,7 @@
"kernelspec": {
"display_name": "voicecraft",
"language": "python",
"name": "voicecraft"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
@@ -292,7 +304,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.19"
"version": "3.9.18"
}
},
"nbformat": 4,