import streamlit as st from faster_whisper import WhisperModel import logging import tempfile import os # Configure logging for debugging purposes logging.basicConfig() logging.getLogger("faster_whisper").setLevel(logging.DEBUG) def format_timestamp(seconds): """Convert seconds to HH:MM:SS.mmm format.""" hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) seconds_remainder = seconds % 60 return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}" def transcribe(audio_file, model_size): # Initialize placeholders for dynamic updates progress_text = st.empty() progress_bar = st.progress(0) # Initialize the Whisper model based on the selected model size device = "cpu" # Use "cpu" for CPU, "cuda" for GPU compute_type = "int8" # Use "int8" for faster inference on both CPU and GPU model = WhisperModel(model_size, device=device, compute_type=compute_type) # Update progress and text for file preparation progress_text.text("Preparing file for transcription...") progress_bar.progress(10) # Save the uploaded file to a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(audio_file.name)[1]) as tmp: tmp.write(audio_file.getvalue()) tmp_path = tmp.name # Update progress and text for transcription start progress_text.text("Transcribing audio...") progress_bar.progress(30) # Transcribe the audio file segments, _ = model.transcribe(tmp_path) # Update progress and text after transcription progress_text.text("Processing transcription...") progress_bar.progress(70) # Clean up the temporary file os.remove(tmp_path) # Format and gather transcription with enhanced timestamps transcription_with_timestamps = [ f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}" for segment in segments ] # Finalize progress and clear text progress_text.text("Transcription complete.") progress_bar.progress(100) progress_text.empty() # Optionally clear the completion message return "\n".join(transcription_with_timestamps) # Example Streamlit UI setup to use transcribe function st.title("Whisper") st.write("For Remove Timestamps please visit [this Space](https://huggingface.co/spaces/Lenylvt/Whisper_Timestamps_Remover). For API use please visit [this space](https://huggingface.co/spaces/Lenylvt/Whisper-API)") audio_file = st.file_uploader("🎵 Upload Audio or Video", type=['wav', 'mp3', 'ogg', 'mp4', 'avi']) model_size = st.selectbox("📁 Model Size", ["base", "small", "medium", "large", "large-v2", "large-v3"]) if audio_file is not None and model_size is not None: transcription = transcribe(audio_file, model_size) st.text_area("📜 Transcription", transcription, height=300)