from faster_whisper import WhisperModel import streamlit as st import time # Load the model model_size = "metame/faster-distil-whisper-large-v2" model = WhisperModel(model_size, device="cpu", compute_type="int8") # Streamlit UI st.title("Audio to Text") audio_file = st.file_uploader("Upload an audio file", type=["mp3", "wav", "ogg", "flac"]) if st.button("Transcribe"): if audio_file is not None: # Save the audio file audio_path = "temp_audio." + audio_file.name.split('.')[-1] with open(audio_path, "wb") as f: f.write(audio_file.getbuffer()) try: # Start transcription process start_time = time.time() segments, info = model.transcribe(audio_path, beam_size=5) # Accumulate transcription results transcription = "" for segment in segments: transcription += f"**[{segment.start:.2f}s - {segment.end:.2f}s]** {segment.text}\n" processing_time = time.time() - start_time # Display results if transcription: st.write("Detected language: '{}' with probability {:.2f}".format(info.language, info.language_probability)) st.subheader("Transcription") st.write(transcription) st.subheader("Processing Time") st.markdown(f"**{processing_time:.2f} seconds**") else: st.error("No transcription was produced. Please check the audio file and try again.") except Exception as e: st.error(f"An error occurred during transcription: {e}") else: st.error("Please upload an audio file.")