File size: 1,696 Bytes
fc56bc8
7f3ad0d
3404b92
 
 
011f92f
fc56bc8
3404b92
 
 
fc56bc8
3404b92
 
fc56bc8
3404b92
 
 
fc56bc8
3404b92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc56bc8
 
7f3ad0d
3404b92
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from faster_whisper import WhisperModel
import streamlit as st
import time

# Load the model
model_size = "metame/faster-distil-whisper-large-v2"
model = WhisperModel(model_size, device="cpu", compute_type="int8")

# Streamlit UI
st.title("Audio to Text")
audio_file = st.file_uploader("Upload an audio file", type=["mp3", "wav", "ogg", "flac"])

if st.button("Transcribe"):
    if audio_file is not None:
        # Save the audio file
        audio_path = "temp_audio." + audio_file.name.split('.')[-1]
        with open(audio_path, "wb") as f:
            f.write(audio_file.getbuffer())

        try:
            # Start transcription process
            start_time = time.time()
            segments, info = model.transcribe(audio_path, beam_size=5)

            # Accumulate transcription results
            transcription = ""
            for segment in segments:
                transcription += f"**[{segment.start:.2f}s - {segment.end:.2f}s]** {segment.text}\n"

            processing_time = time.time() - start_time

            # Display results
            if transcription:
                st.write("Detected language: '{}' with probability {:.2f}".format(info.language, info.language_probability))
                st.subheader("Transcription")
                st.write(transcription)
                st.subheader("Processing Time")
                st.markdown(f"**{processing_time:.2f} seconds**")
            else:
                st.error("No transcription was produced. Please check the audio file and try again.")

        except Exception as e:
            st.error(f"An error occurred during transcription: {e}")
    else:
        st.error("Please upload an audio file.")