Spaces:

Lenylvt
/

Whisper

Runtime error

File size: 2,878 Bytes

30ee258
 
 
77e69d0
 
30ee258
 
 
 
 
 
 
 
 
 
 
 
ecb3aa4
 
 
 
 
30ee258
77e69d0
 
30ee258
 
77e69d0
ecb3aa4
 
 
 
77e69d0
 
 
 
 
ecb3aa4
 
 
222040d
30ee258
77e69d0
ecb3aa4
 
 
222040d
 
77e69d0
 
222040d
30ee258
 
 
 
 
ecb3aa4
 
 
 
 
 
30ee258
 
ecb3aa4
d6f026a
f871faa
30ee258
 
 
 
 
ecb3aa4
30ee258

import streamlit as st
from faster_whisper import WhisperModel
import logging
import tempfile
import os

# Configure logging for debugging purposes
logging.basicConfig()
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)

def format_timestamp(seconds):
    """Convert seconds to HH:MM:SS.mmm format."""
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    seconds_remainder = seconds % 60
    return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"

def transcribe(audio_file, model_size):
    # Initialize placeholders for dynamic updates
    progress_text = st.empty()
    progress_bar = st.progress(0)

    # Initialize the Whisper model based on the selected model size
    device = "cpu"  # Use "cpu" for CPU, "cuda" for GPU
    compute_type = "int8"  # Use "int8" for faster inference on both CPU and GPU
    
    model = WhisperModel(model_size, device=device, compute_type=compute_type)

    # Update progress and text for file preparation
    progress_text.text("Preparing file for transcription...")
    progress_bar.progress(10)
    
    # Save the uploaded file to a temporary file
    with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(audio_file.name)[1]) as tmp:
        tmp.write(audio_file.getvalue())
        tmp_path = tmp.name

    # Update progress and text for transcription start
    progress_text.text("Transcribing audio...")
    progress_bar.progress(30)

    # Transcribe the audio file
    segments, _ = model.transcribe(tmp_path)
    
    # Update progress and text after transcription
    progress_text.text("Processing transcription...")
    progress_bar.progress(70)

    # Clean up the temporary file
    os.remove(tmp_path)

    # Format and gather transcription with enhanced timestamps
    transcription_with_timestamps = [
        f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
        for segment in segments
    ]

    # Finalize progress and clear text
    progress_text.text("Transcription complete.")
    progress_bar.progress(100)
    progress_text.empty()  # Optionally clear the completion message

    return "\n".join(transcription_with_timestamps)

# Example Streamlit UI setup to use transcribe function
st.title("Whisper")
st.write("For Remove Timestamps please visit [this Space](https://huggingface.co/spaces/Lenylvt/Whisper_Timestamps_Remover). For API use please visit [this space](https://huggingface.co/spaces/Lenylvt/Whisper-API)")

audio_file = st.file_uploader("🎵 Upload Audio or Video", type=['wav', 'mp3', 'ogg', 'mp4', 'avi'])
model_size = st.selectbox("📁 Model Size", ["base", "small", "medium", "large", "large-v2", "large-v3"])

if audio_file is not None and model_size is not None:
    transcription = transcribe(audio_file, model_size)
    st.text_area("📜 Transcription", transcription, height=300)