|
import nemo.collections.asr as nemo_asr |
|
import gradio as gr |
|
|
|
|
|
|
|
|
|
asr_model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained("theodotus/stt_ua_fastconformer_hybrid_large_pc", map_location="cpu") |
|
|
|
|
|
|
|
|
|
def process_file(in_filename: str,): |
|
if in_filename is None or in_filename == "": |
|
return "Error: No file" |
|
|
|
transcript = asr_model.transcribe(paths2audio_files = [in_filename])[0][0] |
|
|
|
|
|
return transcript |
|
|
|
|
|
|
|
|
|
demo = gr.Blocks() |
|
|
|
with demo: |
|
with gr.Tabs(): |
|
with gr.TabItem("Upload from disk"): |
|
uploaded_file = gr.Audio( |
|
source="upload", |
|
type="filepath", |
|
optional=False, |
|
label="Upload from disk", |
|
) |
|
upload_button = gr.Button("Submit for recognition") |
|
uploaded_output = gr.Textbox(label="Recognized speech from uploaded file") |
|
|
|
with gr.TabItem("Record from microphone"): |
|
microphone = gr.Audio( |
|
source="microphone", |
|
type="filepath", |
|
optional=False, |
|
label="Record from microphone", |
|
) |
|
|
|
record_button = gr.Button("Submit for recognition") |
|
recorded_output = gr.Textbox(label="Recognized speech from recordings") |
|
|
|
upload_button.click( |
|
process_file, |
|
inputs=[ |
|
uploaded_file, |
|
], |
|
outputs=[uploaded_output], |
|
) |
|
|
|
record_button.click( |
|
process_file, |
|
inputs=[ |
|
microphone, |
|
], |
|
outputs=[recorded_output], |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |