import gradio as gr
from faster_whisper import WhisperModel
import logging
import torch

# Configure logging for debugging purposes
logging.basicConfig()
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)

def format_timestamp(seconds):
    """Convert seconds to HH:MM:SS.mmm format."""
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    seconds_remainder = seconds % 60
    return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"

def transcribe(audio_file, model_size):
    # Initialize the Whisper model based on the selected model size
    device = "cuda" if torch.cuda.is_available() else "cpu"
    compute_type = "float16" if device == "cuda" else "int8"
    
    model = WhisperModel(model_size, device=device, compute_type=compute_type)
    
    # Transcribe the audio file
    segments, _ = model.transcribe(audio_file)
    
    # Format and gather transcription with enhanced timestamps
    transcription_with_timestamps = [
        f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
        for segment in segments
    ]
    
    return "\n".join(transcription_with_timestamps)

# Define the Gradio interface with a dropdown for model selection
iface = gr.Interface(fn=transcribe,
                     inputs=[gr.Audio(sources="upload", type="filepath", label="Upload Audio"),
                             gr.Dropdown(choices=["base", "small", "medium", "large", "large-v2", "large-v3"], label="Model Size")],
                     outputs="text",
                     title="Whisper API",
                     description="For web use please visit [this space](https://huggingface.co/spaces/Lenylvt/Whisper)")

# Launch the app
if __name__ == "__main__":
    iface.launch()