from transformers import pipeline

p = pipeline("automatic-speech-recognition", model = "panjiariputra/indonesian-xlsr_53-LARGE-4gram", decoder = "Wav2Vec2ProcessorWithLM")

import gradio as gr
import time

def transcribe_audio(mic=None, file=None):
    if mic is not None and file is None:
        audio = mic
    elif file is not None and mic is None:
        audio = file
    else:
        return "ERROR: You must and may only select one method, it cannot be empty or select both methods at once."
    transcription = p(audio)["text"]
    return transcription

gr.Interface(
    fn=transcribe_audio,
    inputs=[
        gr.Audio(source="microphone", type="filepath", optional=True, label="Speak here..."),
        gr.Audio(source="upload", type="filepath", optional=True, label="Upload your audio here..."),
    ],
    outputs="text",
    title='Indonesian Speech Recognition',
    description= (
        "<p style='text-align: justify'>"
        "&emsp; This web application is an Indonesian speech recognition built using the XLSR-53 pre-trained model. You can use this web application in two ways: (1) Using microphone as input; and (2) Uploading audio file which will then be processed as input. You cannot use both at the same time nor can it be left blank. This speech recognition can be implemented for certain purposes."
        "</p>"

        "<p style='text-align: justify'>"
        "For offers and information please contact: <br> • panji.arisaputra@binus.ac.id <br> • amalia.zahra@binus.ac.id"
        "</p>"
        ),
    live = True,
    allow_flagging="never"
).launch()