Spaces:
Runtime error
Runtime error
Ahsen Khaliq
commited on
Commit
•
098d68e
1
Parent(s):
ef4b9d8
Update app.py
Browse files
app.py
CHANGED
@@ -15,34 +15,35 @@ import matplotlib.pyplot as plt
|
|
15 |
|
16 |
import gradio as gr
|
17 |
|
18 |
-
|
19 |
-
fs = 16000
|
20 |
-
tag = 'ftshijt/open_li52_asr_train_asr_raw_bpe7000_valid.acc.ave_10best'
|
21 |
-
|
22 |
-
d = ModelDownloader()
|
23 |
-
speech2text = Speech2Text(
|
24 |
-
**d.download_and_unpack(tag),
|
25 |
-
device="cpu",
|
26 |
-
minlenratio=0.0,
|
27 |
-
maxlenratio=0.0,
|
28 |
-
ctc_weight=0.3,
|
29 |
-
beam_size=10,
|
30 |
-
batch_size=0,
|
31 |
-
nbest=1
|
32 |
-
)
|
33 |
|
34 |
def text_normalizer(text):
|
35 |
text = text.upper()
|
36 |
return text.translate(str.maketrans('', '', string.punctuation))
|
37 |
|
38 |
-
def inference(audio):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
speech, rate = librosa.load(audio.name, sr=16000)
|
40 |
assert rate == fs, "mismatch in sampling rate"
|
41 |
nbests = speech2text(speech)
|
42 |
text, *_ = nbests[0]
|
43 |
return f"ASR hypothesis: {text_normalizer(text)}"
|
44 |
|
45 |
-
inputs = gr.inputs.Audio(label="Input Audio", type="file")
|
46 |
outputs = gr.outputs.Textbox(label="Output Text")
|
47 |
|
48 |
title = "ESPnet2-ASR"
|
|
|
15 |
|
16 |
import gradio as gr
|
17 |
|
18 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
def text_normalizer(text):
|
21 |
text = text.upper()
|
22 |
return text.translate(str.maketrans('', '', string.punctuation))
|
23 |
|
24 |
+
def inference(audio, model):
|
25 |
+
lang = 'multilingual'
|
26 |
+
fs = 16000
|
27 |
+
tag = model
|
28 |
+
|
29 |
+
d = ModelDownloader()
|
30 |
+
speech2text = Speech2Text(
|
31 |
+
**d.download_and_unpack(tag),
|
32 |
+
device="cpu",
|
33 |
+
minlenratio=0.0,
|
34 |
+
maxlenratio=0.0,
|
35 |
+
ctc_weight=0.3,
|
36 |
+
beam_size=10,
|
37 |
+
batch_size=0,
|
38 |
+
nbest=1
|
39 |
+
)
|
40 |
speech, rate = librosa.load(audio.name, sr=16000)
|
41 |
assert rate == fs, "mismatch in sampling rate"
|
42 |
nbests = speech2text(speech)
|
43 |
text, *_ = nbests[0]
|
44 |
return f"ASR hypothesis: {text_normalizer(text)}"
|
45 |
|
46 |
+
inputs = [gr.inputs.Audio(label="Input Audio", type="file"),gradio.inputs.Dropdown(choices=["ftshijt/open_li52_asr_train_asr_raw_bpe7000_valid.acc.ave_10best","Shinji Watanabe/spgispeech_asr_train_asr_conformer6_n_fft512_hop_length256_raw_en_unnorm_bpe5000_valid.acc.ave"], type="value", default="ftshijt/open_li52_asr_train_asr_raw_bpe7000_valid.acc.ave_10best", label="model")]
|
47 |
outputs = gr.outputs.Textbox(label="Output Text")
|
48 |
|
49 |
title = "ESPnet2-ASR"
|