import gradio as gr import nemo.collections.asr as nemo_asr # Load the pretrained model conformer_large = nemo_asr.models.ASRModel.from_pretrained("nguyenanh2803/stt_conformer_large_na") # Define the transcription function def transcribe_audio(audio): # audio is a tuple of (sample_rate, audio_data) sample_rate, audio_data = audio # Transcribe the audio file transcription = conformer_large.transcribe([audio_data]) return transcription[0] # Create the Gradio interface interface = gr.Interface( fn=transcribe_audio, inputs=gr.inputs.Audio(source="microphone", type="numpy", label="Speak or Upload an Audio File"), outputs="text", title="Conformer Large ASR", description="Transcribe audio using NVIDIA NeMo's Conformer Large model." ) # Launch the interface interface.launch()