File size: 2,878 Bytes
30ee258
 
 
77e69d0
 
30ee258
 
 
 
 
 
 
 
 
 
 
 
ecb3aa4
 
 
 
 
30ee258
77e69d0
 
30ee258
 
77e69d0
ecb3aa4
 
 
 
77e69d0
 
 
 
 
ecb3aa4
 
 
222040d
30ee258
77e69d0
ecb3aa4
 
 
222040d
 
77e69d0
 
222040d
30ee258
 
 
 
 
ecb3aa4
 
 
 
 
 
30ee258
 
ecb3aa4
d6f026a
f871faa
30ee258
 
 
 
 
ecb3aa4
30ee258
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import streamlit as st
from faster_whisper import WhisperModel
import logging
import tempfile
import os

# Configure logging for debugging purposes
logging.basicConfig()
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)

def format_timestamp(seconds):
    """Convert seconds to HH:MM:SS.mmm format."""
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    seconds_remainder = seconds % 60
    return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"

def transcribe(audio_file, model_size):
    # Initialize placeholders for dynamic updates
    progress_text = st.empty()
    progress_bar = st.progress(0)

    # Initialize the Whisper model based on the selected model size
    device = "cpu"  # Use "cpu" for CPU, "cuda" for GPU
    compute_type = "int8"  # Use "int8" for faster inference on both CPU and GPU
    
    model = WhisperModel(model_size, device=device, compute_type=compute_type)

    # Update progress and text for file preparation
    progress_text.text("Preparing file for transcription...")
    progress_bar.progress(10)
    
    # Save the uploaded file to a temporary file
    with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(audio_file.name)[1]) as tmp:
        tmp.write(audio_file.getvalue())
        tmp_path = tmp.name

    # Update progress and text for transcription start
    progress_text.text("Transcribing audio...")
    progress_bar.progress(30)

    # Transcribe the audio file
    segments, _ = model.transcribe(tmp_path)
    
    # Update progress and text after transcription
    progress_text.text("Processing transcription...")
    progress_bar.progress(70)

    # Clean up the temporary file
    os.remove(tmp_path)

    # Format and gather transcription with enhanced timestamps
    transcription_with_timestamps = [
        f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
        for segment in segments
    ]

    # Finalize progress and clear text
    progress_text.text("Transcription complete.")
    progress_bar.progress(100)
    progress_text.empty()  # Optionally clear the completion message

    return "\n".join(transcription_with_timestamps)

# Example Streamlit UI setup to use transcribe function
st.title("Whisper")
st.write("For Remove Timestamps please visit [this Space](https://huggingface.co/spaces/Lenylvt/Whisper_Timestamps_Remover). For API use please visit [this space](https://huggingface.co/spaces/Lenylvt/Whisper-API)")

audio_file = st.file_uploader("🎡 Upload Audio or Video", type=['wav', 'mp3', 'ogg', 'mp4', 'avi'])
model_size = st.selectbox("πŸ“ Model Size", ["base", "small", "medium", "large", "large-v2", "large-v3"])

if audio_file is not None and model_size is not None:
    transcription = transcribe(audio_file, model_size)
    st.text_area("πŸ“œ Transcription", transcription, height=300)