import gradio as gr | |
import nemo.collections.asr as nemo_asr | |
import numpy as np | |
# Load the pre-trained Kabyle ASR model | |
asr_model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained("nvidia/stt_kab_conformer_transducer_large") | |
# Function to transcribe the audio input | |
def transcribe(audio): | |
# Extract audio data and sample rate | |
audio_data, sample_rate = audio | |
# Convert audio data to numpy array if it's not already | |
if isinstance(audio_data, np.ndarray): | |
audio_data = np.array(audio_data) | |
# Transcribe the audio | |
return asr_model.transcribe([audio_data]) | |
# Create the Gradio interface with audio input and text output | |
iface = gr.Interface(fn=transcribe, inputs="audio", outputs="text") | |
# Launch the Gradio interface | |
iface.launch() | |