akashsivanandan commited on
Commit
8133adf
1 Parent(s): f7770e0

Updated app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -0
app.py CHANGED
@@ -1,3 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
  gr.Interface(
 
1
+ from deepspeech import Model
2
+ import numpy as np
3
+
4
+ model_file_path = "deepspeech-0.8.2-models.pbmm"
5
+ lm_file_path = "deepspeech-0.8.2-models.scorer"
6
+ beam_width = 100
7
+ lm_alpha = 0.93
8
+ lm_beta = 1.18
9
+
10
+ model = Model(model_file_path)
11
+ model.enableExternalScorer(lm_file_path)
12
+ model.setScorerAlphaBeta(lm_alpha, lm_beta)
13
+ model.setBeamWidth(beam_width)
14
+
15
+
16
+ def reformat_freq(sr, y):
17
+ if sr not in (
18
+ 48000,
19
+ 16000,
20
+ ): # Deepspeech only supports 16k, (we convert 48k -> 16k)
21
+ raise ValueError("Unsupported rate", sr)
22
+ if sr == 48000:
23
+ y = (
24
+ ((y / max(np.max(y), 1)) * 32767)
25
+ .reshape((-1, 3))
26
+ .mean(axis=1)
27
+ .astype("int16")
28
+ )
29
+ sr = 16000
30
+ return sr, y
31
+
32
+
33
+ def transcribe(speech, stream):
34
+ _, y = reformat_freq(*speech)
35
+ if stream is None:
36
+ stream = model.createStream()
37
+ stream.feedAudioContent(y)
38
+ text = stream.intermediateDecode()
39
+ return text, stream
40
+
41
+
42
+
43
+
44
  import gradio as gr
45
 
46
  gr.Interface(