import gradio as gr
import librosa
import soundfile as sf
import torch
import warnings
import os 
from transformers import Wav2Vec2ProcessorWithLM, Wav2Vec2CTCTokenizer, Wav2Vec2Model


warnings.filterwarnings("ignore")

from speechbrain.pretrained import EncoderDecoderASR

asr_model = EncoderDecoderASR.from_hparams(source="speechbrain/asr-wav2vec2-commonvoice-rw", savedir="pretrained_models/asr-wav2vec2-commonvoice-rw")
#asr_model.transcribe_file("speechbrain/asr-wav2vec2-commonvoice-rw/example.mp3")


# define speech-to-text function
def asr_transcript(audio):
   
    if audio == None:
        return "Please provide audio by uploading a file or by recording audio using microphone by pressing Record (And allow usage of microphone)", "Please provide audio by uploading a file or by recording audio using microphone by pressing Record (And allow usage of microphone)"
    text = ""

    if audio:
        text = asr_model.transcribe_file(audio.name) 
        
        return text
    else:
        return  "File not valid"
    
gradio_ui = gr.Interface(
    fn=asr_transcript,
    title="Kinyarwanda Speech Recognition",
    description="Upload an audio clip or record from browser using microphone, and let AI do the hard work of transcribing.",
    article = """
    This demo showcases the pretrained model from deepspeech.
    """,
    inputs=[gr.inputs.Audio(source="microphone", type="file", optional=False, label="Record from microphone")],
    outputs=[gr.outputs.Textbox(label="Recognized speech")],
    examples =  [["sample_1.wav"],["sample_2.wav"]]
)

gradio_ui.launch(enable_queue=True)