better handle numbers
This commit is contained in:
parent
eb8d89f618
commit
96f6f9fc7a
|
@ -1,4 +1,6 @@
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
|
from num2words import num2words
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
import torch
|
import torch
|
||||||
import torchaudio
|
import torchaudio
|
||||||
|
@ -201,6 +203,15 @@ def get_output_audio(audio_tensors, codec_audio_sr):
|
||||||
buffer.seek(0)
|
buffer.seek(0)
|
||||||
return buffer.read()
|
return buffer.read()
|
||||||
|
|
||||||
|
def replace_numbers_with_words(sentence):
|
||||||
|
sentence = re.sub(r'(\d+)', r' \1 ', sentence) # add spaces around numbers
|
||||||
|
def replace_with_words(match):
|
||||||
|
num = match.group(0)
|
||||||
|
try:
|
||||||
|
return num2words(num) # Convert numbers to words
|
||||||
|
except:
|
||||||
|
return num # In case num2words fails (unlikely with digits but just to be safe)
|
||||||
|
return re.sub(r'\b\d+\b', replace_with_words, sentence) # Regular expression that matches numbers
|
||||||
|
|
||||||
def run(seed, left_margin, right_margin, codec_audio_sr, codec_sr, top_k, top_p, temperature,
|
def run(seed, left_margin, right_margin, codec_audio_sr, codec_sr, top_k, top_p, temperature,
|
||||||
stop_repetition, sample_batch_size, kvcache, silence_tokens,
|
stop_repetition, sample_batch_size, kvcache, silence_tokens,
|
||||||
|
@ -213,6 +224,8 @@ def run(seed, left_margin, right_margin, codec_audio_sr, codec_sr, top_k, top_p,
|
||||||
raise gr.Error("Can't use smart transcript: whisper transcript not found")
|
raise gr.Error("Can't use smart transcript: whisper transcript not found")
|
||||||
|
|
||||||
seed_everything(seed)
|
seed_everything(seed)
|
||||||
|
transcript = replace_numbers_with_words(transcript).replace(" ", " ").replace(" ", " ") # replace numbers with words, so that the phonemizer can do a better job
|
||||||
|
|
||||||
if mode == "Long TTS":
|
if mode == "Long TTS":
|
||||||
if split_text == "Newline":
|
if split_text == "Newline":
|
||||||
sentences = transcript.split('\n')
|
sentences = transcript.split('\n')
|
||||||
|
|
|
@ -4,3 +4,4 @@ openai-whisper>=20231117
|
||||||
aeneas>=1.7.3.0
|
aeneas>=1.7.3.0
|
||||||
whisperx>=3.1.1
|
whisperx>=3.1.1
|
||||||
huggingface_hub==0.22.2
|
huggingface_hub==0.22.2
|
||||||
|
num2words==0.5.13
|
||||||
|
|
Loading…
Reference in New Issue