File size: 846 Bytes
0432ec8
 
12b8cb1
 
 
 
 
179cc8d
 
 
 
 
 
12b8cb1
 
 
179cc8d
 
 
12b8cb1
51fb0b7
 
 
a5eb216
51fb0b7
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import gradio as gr

from nemo.collections.asr.models import ASRModel
import torch
if torch.cuda.is_available():
    device = torch.device(f'cuda:0')
asr_model = ASRModel.from_pretrained(model_name='stt_en_citrinet_1024')

from happytransformer import HappyTextToText, TTSettings
happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction")
args = TTSettings(num_beams=5, min_length=1)


def transcribe(audio):
  """Speech to text using Nvidia Nemo"""
  text = asr_model.transcribe(paths2audio_files=[audio])[0]
  # Add the prefix "grammar: " before each input 
  correct = happy_tt.generate_text("grammar: " + text, args=args)
  return text, correct.text
    
gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Audio(source="microphone", type="filepath"),
    ],
    outputs=[
        "textbox",
        "textbox"
    ]).launch()