import gradio as gr
from transformers import pipeline
import librosa
import numpy as np
import traceback

# Initialize the ASR pipeline
asr_pipeline = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en")

# Define the function to transcribe the speech
def transcribe_long_form(file_info):
    try:
        # Read the audio file
        audio, sample_rate = file_info
        # Convert to mono if necessary
        if audio.ndim > 1:
            audio = librosa.to_mono(audio)
        # Resample to 16 kHz if necessary
        if sample_rate != 16000:
            audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=16000)
        # Transcribe the audio
        result = asr_pipeline(audio)
        return result['text']
    except Exception as e:
        # Print the full traceback to the console
        print(traceback.format_exc())
        # Return the error message for the Gradio interface
        return f"An error occurred: {str(e)}"

# Define Gradio interface
iface = gr.Interface(
    fn=transcribe_long_form,
    inputs=gr.Audio(label="Record or Upload Audio"),
    outputs="text",
    title="Transcribe Audio"
)

# Launch the Gradio app
iface.launch()