theodotus's picture
Fixed model.transcribe
21d3cbb
import nemo.collections.asr as nemo_asr
import gradio as gr
asr_model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained("theodotus/stt_ua_fastconformer_hybrid_large_pc", map_location="cpu")
def process_file(in_filename: str,):
if in_filename is None or in_filename == "":
return "Error: No file"
transcript = asr_model.transcribe(paths2audio_files = [in_filename])[0][0]
return transcript
demo = gr.Blocks()
with demo:
with gr.Tabs():
with gr.TabItem("Upload from disk"):
uploaded_file = gr.Audio(
source="upload", # Choose between "microphone", "upload"
type="filepath",
optional=False,
label="Upload from disk",
)
upload_button = gr.Button("Submit for recognition")
uploaded_output = gr.Textbox(label="Recognized speech from uploaded file")
with gr.TabItem("Record from microphone"):
microphone = gr.Audio(
source="microphone", # Choose between "microphone", "upload"
type="filepath",
optional=False,
label="Record from microphone",
)
record_button = gr.Button("Submit for recognition")
recorded_output = gr.Textbox(label="Recognized speech from recordings")
upload_button.click(
process_file,
inputs=[
uploaded_file,
],
outputs=[uploaded_output],
)
record_button.click(
process_file,
inputs=[
microphone,
],
outputs=[recorded_output],
)
if __name__ == "__main__":
demo.launch()