Whisper / app.py
Lenylvt's picture
Update app.py
f871faa verified
import streamlit as st
from faster_whisper import WhisperModel
import logging
import tempfile
import os
# Configure logging for debugging purposes
logging.basicConfig()
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
def format_timestamp(seconds):
"""Convert seconds to HH:MM:SS.mmm format."""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds_remainder = seconds % 60
return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
def transcribe(audio_file, model_size):
# Initialize placeholders for dynamic updates
progress_text = st.empty()
progress_bar = st.progress(0)
# Initialize the Whisper model based on the selected model size
device = "cpu" # Use "cpu" for CPU, "cuda" for GPU
compute_type = "int8" # Use "int8" for faster inference on both CPU and GPU
model = WhisperModel(model_size, device=device, compute_type=compute_type)
# Update progress and text for file preparation
progress_text.text("Preparing file for transcription...")
progress_bar.progress(10)
# Save the uploaded file to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(audio_file.name)[1]) as tmp:
tmp.write(audio_file.getvalue())
tmp_path = tmp.name
# Update progress and text for transcription start
progress_text.text("Transcribing audio...")
progress_bar.progress(30)
# Transcribe the audio file
segments, _ = model.transcribe(tmp_path)
# Update progress and text after transcription
progress_text.text("Processing transcription...")
progress_bar.progress(70)
# Clean up the temporary file
os.remove(tmp_path)
# Format and gather transcription with enhanced timestamps
transcription_with_timestamps = [
f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
for segment in segments
]
# Finalize progress and clear text
progress_text.text("Transcription complete.")
progress_bar.progress(100)
progress_text.empty() # Optionally clear the completion message
return "\n".join(transcription_with_timestamps)
# Example Streamlit UI setup to use transcribe function
st.title("Whisper")
st.write("For Remove Timestamps please visit [this Space](https://huggingface.co/spaces/Lenylvt/Whisper_Timestamps_Remover). For API use please visit [this space](https://huggingface.co/spaces/Lenylvt/Whisper-API)")
audio_file = st.file_uploader("🎡 Upload Audio or Video", type=['wav', 'mp3', 'ogg', 'mp4', 'avi'])
model_size = st.selectbox("πŸ“ Model Size", ["base", "small", "medium", "large", "large-v2", "large-v3"])
if audio_file is not None and model_size is not None:
transcription = transcribe(audio_file, model_size)
st.text_area("πŸ“œ Transcription", transcription, height=300)