from transformers import pipeline
import gradio as gr
from transformers.pipelines.audio_utils import ffmpeg_read
import os

MODEL_NAME = "Bidwill/whisper-small-sanskrit"
BATCH_SIZE = 8
TARGET_SAMPLING_RATE = 16000  # Whisper uses 16kHz by default

# Initialize the Whisper pipeline
pipe = pipeline(
    model=MODEL_NAME,
    chunk_length_s=30,
)

def transcribe(inputs):
    if inputs is None:
        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
    
    # Open the file in binary mode and pass the bytes data to ffmpeg_read
    with open(inputs, "rb") as audio_file:
        audio_bytes = audio_file.read()
    
    # Load the audio from the bytes using ffmpeg_read with the correct sampling rate
    audio_data= ffmpeg_read(audio_bytes, sampling_rate=TARGET_SAMPLING_RATE)
    
    # Process the raw audio data with Whisper
    text = pipe(audio_data, batch_size=BATCH_SIZE)["text"]
    
    return text

# Create the Gradio demo
def create_demo():
    with gr.Blocks() as demo:
        # Tab for microphone transcription
        with gr.Tab("Transcribe Microphone"):
            mic_input = gr.Audio(sources="microphone", type="filepath", label="Microphone Audio")
            mic_output = gr.Textbox(label="Transcription")
            mic_button = gr.Button("Transcribe")
            mic_button.click(transcribe, inputs=mic_input, outputs=mic_output)

        # Tab for audio file transcription
        with gr.Tab("Transcribe Audio File"):
            file_input = gr.Audio(sources="upload", type="filepath", label="Upload Audio File")
            file_output = gr.Textbox(label="Transcription")
            file_button = gr.Button("Transcribe")
            file_button.click(transcribe, inputs=file_input, outputs=file_output)

    return demo

# Launching the demo
demo = create_demo()
demo.launch(share=True)