spookyspaghetti commited on
Commit
0432ec8
1 Parent(s): d499ebb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ !pip install -U git+https://github.com/PrithivirajDamodaran/Gramformer.git
2
+ !pip install gradio -q
3
+ ## Install dependencies
4
+ !pip install wget
5
+ !apt-get install sox libsndfile1 ffmpeg
6
+ !pip install text-unidecode
7
+ !pip install matplotlib>=3.3.2
8
+
9
+ ## Install NeMo
10
+ BRANCH = 'r1.13.0'
11
+ !python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]
12
+
13
+ ## Grab the config we'll use in this example
14
+ !mkdir configs
15
+ !wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/config.yaml
16
+
17
+ !python -m spacy download en_core_web_md
18
+ !python -m spacy link en_core_web_md en
19
+
20
+ import gradio as gr
21
+ import time
22
+ from nemo.collections.asr.models import ASRModel
23
+ import torch
24
+ if torch.cuda.is_available():
25
+ device = torch.device(f'cuda:0')
26
+ asr_model = ASRModel.from_pretrained(model_name='stt_en_citrinet_1024')
27
+
28
+ from gramformer import Gramformer
29
+ import torch
30
+
31
+ def set_seed(seed):
32
+ torch.manual_seed(seed)
33
+ if torch.cuda.is_available():
34
+ torch.cuda.manual_seed_all(seed)
35
+
36
+ set_seed(1212)
37
+
38
+ gf = Gramformer(models = 1, use_gpu=False) # 1=corrector, 2=detector
39
+
40
+ def transcribe(audio):
41
+ """Speech to text using Nvidia Nemo"""
42
+ text = asr_model.transcribe(paths2audio_files=[audio])[0]
43
+ correct = list(gf.correct(text, max_candidates = 1))[0]
44
+ return text, correct
45
+
46
+ # we need input, output and interface components for gradio
47
+ gr.Interface(
48
+ fn=transcribe,
49
+ inputs=[
50
+ gr.components.Audio(type="filepath"),
51
+ ],
52
+ outputs=[
53
+ "textbox",
54
+ "textbox"
55
+ ]).launch()