From 96f6f9fc7a40028753a60e093fc49e4010a0a458 Mon Sep 17 00:00:00 2001 From: pyp_l40 Date: Mon, 22 Apr 2024 11:56:39 -0500 Subject: [PATCH] better handle numbers --- gradio_app.py | 13 +++++++++++++ gradio_requirements.txt | 1 + 2 files changed, 14 insertions(+) diff --git a/gradio_app.py b/gradio_app.py index 1e84732..fc62c4a 100644 --- a/gradio_app.py +++ b/gradio_app.py @@ -1,4 +1,6 @@ import os +import re +from num2words import num2words import gradio as gr import torch import torchaudio @@ -201,6 +203,15 @@ def get_output_audio(audio_tensors, codec_audio_sr): buffer.seek(0) return buffer.read() +def replace_numbers_with_words(sentence): + sentence = re.sub(r'(\d+)', r' \1 ', sentence) # add spaces around numbers + def replace_with_words(match): + num = match.group(0) + try: + return num2words(num) # Convert numbers to words + except: + return num # In case num2words fails (unlikely with digits but just to be safe) + return re.sub(r'\b\d+\b', replace_with_words, sentence) # Regular expression that matches numbers def run(seed, left_margin, right_margin, codec_audio_sr, codec_sr, top_k, top_p, temperature, stop_repetition, sample_batch_size, kvcache, silence_tokens, @@ -213,6 +224,8 @@ def run(seed, left_margin, right_margin, codec_audio_sr, codec_sr, top_k, top_p, raise gr.Error("Can't use smart transcript: whisper transcript not found") seed_everything(seed) + transcript = replace_numbers_with_words(transcript).replace(" ", " ").replace(" ", " ") # replace numbers with words, so that the phonemizer can do a better job + if mode == "Long TTS": if split_text == "Newline": sentences = transcript.split('\n') diff --git a/gradio_requirements.txt b/gradio_requirements.txt index e9e7635..5b2958a 100644 --- a/gradio_requirements.txt +++ b/gradio_requirements.txt @@ -4,3 +4,4 @@ openai-whisper>=20231117 aeneas>=1.7.3.0 whisperx>=3.1.1 huggingface_hub==0.22.2 +num2words==0.5.13