import streamlit as st import whisper import tempfile from pydub import AudioSegment # Define available models available_models = ["tiny", "base", "small", "medium", "large"] st.title("Voice Recognition App") st.write("Upload an audio file and choose a Whisper model to transcribe it to text.") # Model selection dropdown model_choice = st.selectbox("Choose a Whisper model", available_models) # Load the selected Whisper model st.write(f"Loading {model_choice} model...") model = whisper.load_model(model_choice) st.write(f"{model_choice} model loaded successfully.") # File uploader for audio file uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "m4a"]) if uploaded_file is not None: # Save the uploaded file temporarily with tempfile.NamedTemporaryFile(delete=False) as temp_file: temp_file.write(uploaded_file.read()) temp_file_path = temp_file.name # Convert audio file to a format supported by Whisper (if necessary) audio = AudioSegment.from_file(temp_file_path) temp_wav_path = tempfile.mktemp(suffix=".wav") audio.export(temp_wav_path, format="wav") st.audio(uploaded_file, format="audio/wav") st.write("Transcribing audio...") # Transcribe audio using Whisper model result = model.transcribe(temp_wav_path) st.write("Transcription:") st.write(result["text"])