rutsam commited on
Commit
8d72534
1 Parent(s): d857f13

transcribe files as well

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. requirements.txt +1 -0
app.py CHANGED
@@ -50,7 +50,7 @@ gradio_ui = gr.Interface(
50
  article = """
51
  This demo showcases two pretrained STT models the first model from speechbrain(wave2vec+CTC models)(1,2gb) is 30 times larger compared to the coqui STT (deepspeech model)(45mb).
52
  """,
53
- inputs=[gr.inputs.Audio(source="microphone", type="file", optional=False, label="Record from microphone")],
54
  outputs=[gr.outputs.Textbox(label="Recognized speech from speechbrain model"),
55
  gr.outputs.Textbox(label="Recognized speech from coqui STT model"),
56
  gr.outputs.Textbox(label="Recognized speech from NVIDIA Conformer transduver large model")]
 
50
  article = """
51
  This demo showcases two pretrained STT models the first model from speechbrain(wave2vec+CTC models)(1,2gb) is 30 times larger compared to the coqui STT (deepspeech model)(45mb).
52
  """,
53
+ inputs=[gr.inputs.Audio(label="Upload Audio File", type="file", optional=True), gr.inputs.Audio(source="microphone", type="file", optional=False, label="Record from microphone")],
54
  outputs=[gr.outputs.Textbox(label="Recognized speech from speechbrain model"),
55
  gr.outputs.Textbox(label="Recognized speech from coqui STT model"),
56
  gr.outputs.Textbox(label="Recognized speech from NVIDIA Conformer transduver large model")]
requirements.txt CHANGED
@@ -10,4 +10,5 @@ ffmpeg-python
10
  soundfile==0.10.3.post1
11
  wget
12
  aiofiles
 
13
  git+https://github.com/NVIDIA/NeMo.git@r1.11.0#egg=nemo_toolkit[all]
 
10
  soundfile==0.10.3.post1
11
  wget
12
  aiofiles
13
+ pydub
14
  git+https://github.com/NVIDIA/NeMo.git@r1.11.0#egg=nemo_toolkit[all]