Compare commits
3 Commits
348ffd59ef
...
64da32134a
Author | SHA1 | Date |
---|---|---|
Forkoz | 64da32134a | |
pyp_l40 | fd20265324 | |
Forkoz | 6dda1a4f32 |
|
@ -74,6 +74,8 @@ class WhisperxModel:
|
|||
|
||||
def transcribe(self, audio_path):
|
||||
segments = self.model.transcribe(audio_path, batch_size=8)["segments"]
|
||||
for segment in segments:
|
||||
segment['text'] = replace_numbers_with_words(segment['text'])
|
||||
return self.align_model.align(segments, audio_path)
|
||||
|
||||
|
||||
|
@ -177,7 +179,7 @@ def align(seed, transcript, audio_path):
|
|||
if align_model is None:
|
||||
raise gr.Error("Align model not loaded")
|
||||
seed_everything(seed)
|
||||
|
||||
transcript = replace_numbers_with_words(transcript).replace(" ", " ").replace(" ", " ")
|
||||
fragments = align_segments(transcript, audio_path)
|
||||
segments = [{
|
||||
"start": float(fragment["begin"]),
|
||||
|
|
|
@ -711,7 +711,7 @@ class VoiceCraft(
|
|||
##################### silence repetition handling #####################
|
||||
# prepare the cache placeholder
|
||||
# n_layers, 2, bsz, num_heads, src_len, head_dim
|
||||
past = torch.ones([self.args.num_decoder_layers, 2, x.shape[0]], device=x.device, dtype=torch.float32) if kvcache else None
|
||||
past = torch.ones([self.args.num_decoder_layers, 2, x.shape[0]], device=x.device, dtype=torch.float16) if kvcache else None
|
||||
# handle multi-span kv-cache
|
||||
new_masked_span = False
|
||||
|
||||
|
@ -1011,7 +1011,7 @@ class VoiceCraft(
|
|||
|
||||
# prepare the cache placeholder
|
||||
# n_layers, 2, bsz, num_heads, src_len, head_dim
|
||||
past = torch.ones([self.args.num_decoder_layers, 2, x.shape[0]], device=x.device, dtype=torch.float32) if kvcache else None
|
||||
past = torch.ones([self.args.num_decoder_layers, 2, x.shape[0]], device=x.device, dtype=torch.float16) if kvcache else None
|
||||
# logging.info(f"number of decoder layers: {self.args.num_decoder_layers}")
|
||||
# logging.info(f"number of decoder layers: {self.args.num_decoder_layers}")
|
||||
# logging.info(f"number of decoder layers: {self.args.num_decoder_layers}")
|
||||
|
@ -1261,7 +1261,7 @@ class VoiceCraft(
|
|||
|
||||
# prepare the cache placeholder
|
||||
# n_layers, 2, bsz, num_heads, src_len, head_dim
|
||||
past = torch.ones([self.args.num_decoder_layers, 2, x.shape[0]], device=x.device, dtype=torch.float32) if kvcache else None
|
||||
past = torch.ones([self.args.num_decoder_layers, 2, x.shape[0]], device=x.device, dtype=torch.float16) if kvcache else None
|
||||
# logging.info(f"number of decoder layers: {self.args.num_decoder_layers}")
|
||||
# logging.info(f"number of decoder layers: {self.args.num_decoder_layers}")
|
||||
# logging.info(f"number of decoder layers: {self.args.num_decoder_layers}")
|
||||
|
|
Loading…
Reference in New Issue