import gradio as gr from faster_whisper import WhisperModel import logging import torch # Configure logging for debugging purposes logging.basicConfig() logging.getLogger("faster_whisper").setLevel(logging.DEBUG) def format_timestamp(seconds): """Convert seconds to HH:MM:SS.mmm format.""" hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) seconds_remainder = seconds % 60 return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}" def transcribe(audio_file, model_size): # Initialize the Whisper model based on the selected model size device = "cuda" if torch.cuda.is_available() else "cpu" compute_type = "float16" if device == "cuda" else "int8" model = WhisperModel(model_size, device=device, compute_type=compute_type) # Transcribe the audio file segments, _ = model.transcribe(audio_file) # Format and gather transcription with enhanced timestamps transcription_with_timestamps = [ f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}" for segment in segments ] return "\n".join(transcription_with_timestamps) # Define the Gradio interface with a dropdown for model selection iface = gr.Interface(fn=transcribe, inputs=[gr.Audio(sources="upload", type="filepath", label="Upload Audio"), gr.Dropdown(choices=["base", "small", "medium", "large", "large-v2", "large-v3"], label="Model Size")], outputs="text", title="Whisper API", description="For web use please visit [this space](https://huggingface.co/spaces/Lenylvt/Whisper)") # Launch the app if __name__ == "__main__": iface.launch()