import gradio as gr
from transformers import pipeline
import librosa
import traceback

# Initialize the ASR pipeline
asr_pipeline = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en")

def transcribe_long_form(file_info):
    if not file_info:
        return "No audio file provided."

    try:
        # Unpack the audio array and sample rate from the tuple
        audio_array, sample_rate = file_info

        # Check if the audio array is multi-channel (stereo) and convert it to mono
        if audio_array.ndim > 1:
            audio_mono = librosa.to_mono(audio_array)
        else:
            audio_mono = audio_array

        # Resample the audio to 16 kHz if the current sample rate is different
        if sample_rate != 16000:
            audio_mono = librosa.resample(audio_mono, orig_sr=sample_rate, target_sr=16000)

        # Transcribe the audio using the ASR pipeline
        result = asr_pipeline(audio_mono, sampling_rate=16000)
        # Access the first result's 'text' field
        return result[0]['text']
    except Exception as e:
        traceback.print_exc()
        return f"An error occurred: {str(e)}"


# Define Gradio interfaces for microphone and file upload
mic_transcribe = gr.Interface(
    fn=transcribe_speech,
    inputs=gr.Audio(sources="microphone", type="filepath"),
    outputs=gr.Textbox(label="Transcription", lines=3),
    allow_flagging="never"
)

file_transcribe = gr.Interface(
    fn=transcribe_speech,
    inputs=gr.Audio(sources="upload", type="filepath"),
    outputs=gr.Textbox(label="Transcription", lines=3),
    allow_flagging="never"
)

# Setup the main Gradio app with tabbed interfaces for different input sources
with gr.Blocks() as demo:
    gr.TabbedInterface([mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"])

# Launch the Gradio app
demo.launch(share=True)