File size: 2,878 Bytes
30ee258 77e69d0 30ee258 ecb3aa4 30ee258 77e69d0 30ee258 77e69d0 ecb3aa4 77e69d0 ecb3aa4 222040d 30ee258 77e69d0 ecb3aa4 222040d 77e69d0 222040d 30ee258 ecb3aa4 30ee258 ecb3aa4 d6f026a f871faa 30ee258 ecb3aa4 30ee258 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import streamlit as st
from faster_whisper import WhisperModel
import logging
import tempfile
import os
# Configure logging for debugging purposes
logging.basicConfig()
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
def format_timestamp(seconds):
"""Convert seconds to HH:MM:SS.mmm format."""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds_remainder = seconds % 60
return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
def transcribe(audio_file, model_size):
# Initialize placeholders for dynamic updates
progress_text = st.empty()
progress_bar = st.progress(0)
# Initialize the Whisper model based on the selected model size
device = "cpu" # Use "cpu" for CPU, "cuda" for GPU
compute_type = "int8" # Use "int8" for faster inference on both CPU and GPU
model = WhisperModel(model_size, device=device, compute_type=compute_type)
# Update progress and text for file preparation
progress_text.text("Preparing file for transcription...")
progress_bar.progress(10)
# Save the uploaded file to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(audio_file.name)[1]) as tmp:
tmp.write(audio_file.getvalue())
tmp_path = tmp.name
# Update progress and text for transcription start
progress_text.text("Transcribing audio...")
progress_bar.progress(30)
# Transcribe the audio file
segments, _ = model.transcribe(tmp_path)
# Update progress and text after transcription
progress_text.text("Processing transcription...")
progress_bar.progress(70)
# Clean up the temporary file
os.remove(tmp_path)
# Format and gather transcription with enhanced timestamps
transcription_with_timestamps = [
f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
for segment in segments
]
# Finalize progress and clear text
progress_text.text("Transcription complete.")
progress_bar.progress(100)
progress_text.empty() # Optionally clear the completion message
return "\n".join(transcription_with_timestamps)
# Example Streamlit UI setup to use transcribe function
st.title("Whisper")
st.write("For Remove Timestamps please visit [this Space](https://huggingface.co/spaces/Lenylvt/Whisper_Timestamps_Remover). For API use please visit [this space](https://huggingface.co/spaces/Lenylvt/Whisper-API)")
audio_file = st.file_uploader("π΅ Upload Audio or Video", type=['wav', 'mp3', 'ogg', 'mp4', 'avi'])
model_size = st.selectbox("π Model Size", ["base", "small", "medium", "large", "large-v2", "large-v3"])
if audio_file is not None and model_size is not None:
transcription = transcribe(audio_file, model_size)
st.text_area("π Transcription", transcription, height=300)
|