import subprocess import streamlit as st import speech_recognition as sr from streaming_stt_nemo import Model if "sv_load_flash_attention" not in st.session_state: subprocess.run( "pip install flash-attn --no-build-isolation", env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, shell=True, ) st.session_state.sv_load_flash_attention = True def main(): st.title("Speech to Text Converter") # Create a recognizer object r = sr.Recognizer() # Create a microphone object mic = sr.Microphone() # Continuously listen for audio input with mic as source: st.write("Listening...") while True: try: # Adjust the ambient noise threshold for better results r.adjust_for_ambient_noise(source) # Listen for audio input audio = r.listen(source) # Convert audio to text default_lang = "en" engines = { default_lang: Model(default_lang) } model = engines[default_lang] text = model.stt_file(audio)[0] # Display the converted text st.write("You said:", text) except sr.UnknownValueError: pass if __name__ == "__main__": main()