Ahsen Khaliq commited on
Commit
098d68e
1 Parent(s): ef4b9d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -17
app.py CHANGED
@@ -15,34 +15,35 @@ import matplotlib.pyplot as plt
15
 
16
  import gradio as gr
17
 
18
- lang = 'multilingual'
19
- fs = 16000
20
- tag = 'ftshijt/open_li52_asr_train_asr_raw_bpe7000_valid.acc.ave_10best'
21
-
22
- d = ModelDownloader()
23
- speech2text = Speech2Text(
24
- **d.download_and_unpack(tag),
25
- device="cpu",
26
- minlenratio=0.0,
27
- maxlenratio=0.0,
28
- ctc_weight=0.3,
29
- beam_size=10,
30
- batch_size=0,
31
- nbest=1
32
- )
33
 
34
  def text_normalizer(text):
35
  text = text.upper()
36
  return text.translate(str.maketrans('', '', string.punctuation))
37
 
38
- def inference(audio):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  speech, rate = librosa.load(audio.name, sr=16000)
40
  assert rate == fs, "mismatch in sampling rate"
41
  nbests = speech2text(speech)
42
  text, *_ = nbests[0]
43
  return f"ASR hypothesis: {text_normalizer(text)}"
44
 
45
- inputs = gr.inputs.Audio(label="Input Audio", type="file")
46
  outputs = gr.outputs.Textbox(label="Output Text")
47
 
48
  title = "ESPnet2-ASR"
15
 
16
  import gradio as gr
17
 
18
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  def text_normalizer(text):
21
  text = text.upper()
22
  return text.translate(str.maketrans('', '', string.punctuation))
23
 
24
+ def inference(audio, model):
25
+ lang = 'multilingual'
26
+ fs = 16000
27
+ tag = model
28
+
29
+ d = ModelDownloader()
30
+ speech2text = Speech2Text(
31
+ **d.download_and_unpack(tag),
32
+ device="cpu",
33
+ minlenratio=0.0,
34
+ maxlenratio=0.0,
35
+ ctc_weight=0.3,
36
+ beam_size=10,
37
+ batch_size=0,
38
+ nbest=1
39
+ )
40
  speech, rate = librosa.load(audio.name, sr=16000)
41
  assert rate == fs, "mismatch in sampling rate"
42
  nbests = speech2text(speech)
43
  text, *_ = nbests[0]
44
  return f"ASR hypothesis: {text_normalizer(text)}"
45
 
46
+ inputs = [gr.inputs.Audio(label="Input Audio", type="file"),gradio.inputs.Dropdown(choices=["ftshijt/open_li52_asr_train_asr_raw_bpe7000_valid.acc.ave_10best","Shinji Watanabe/spgispeech_asr_train_asr_conformer6_n_fft512_hop_length256_raw_en_unnorm_bpe5000_valid.acc.ave"], type="value", default="ftshijt/open_li52_asr_train_asr_raw_bpe7000_valid.acc.ave_10best", label="model")]
47
  outputs = gr.outputs.Textbox(label="Output Text")
48
 
49
  title = "ESPnet2-ASR"