from transformers import pipeline import streamlit as st from st_audiorec import st_audiorec @st.cache_resource def load_model(): pipe=pipeline("automatic-speech-recognition","distil-whisper/distil-large-v2") return pipe speech_to_text_model=load_model() def make_text(audio): global speech_to_text_model text= speech_to_text_model(audio) extract_text=text['text'] return extract_text st.title('speech recognition') with st.form(key='record audio'): wave_audio_data=st_audiorec() button=st.form_submit_button(label='Convert to Text') if button: text=make_text(wave_audio_data) st.write(text) else: st.success('No Audio data yet')