from tempfile import NamedTemporaryFile from transformers import pipeline import streamlit as st from st_audiorec import st_audiorec #load and cache speech recoginition model @st.cache_resource def load_model(): pipe=pipeline("automatic-speech-recognition","distil-whisper/distil-large-v2") return pipe speech_to_text_model=load_model() def make_text(audio): global speech_to_text_model text= speech_to_text_model(audio) extract_text=text['text'] return extract_text st.title('speech recognition, is it worth it!') st.write('Click start recording to record an audio in English, afterwards click stop, click on convert text once and wait') col1,col2=st.columns(2) with col1: with st.form(key='record audio'): #Record an Audio wave_audio_data=st_audiorec() #submit button=st.form_submit_button(label='Convert to Text,click once and wait') #if the submit button is pressed if button: st.success('audio submitted, processing is slow give us some few seconds') try: #check if audio file if wave_audio_data is not None: #do the conversion text=make_text(wave_audio_data) st.write(text) except: st.write("we can't process your request at this time") else: st.success('No Audio data yet') with col2: with st.form(key='file upload '): #or upload an audio file st.write('Or you can upload an audio file') upload=st.file_uploader(label='Upload audio file') #submit button=st.form_submit_button(label='Convert to Tex. Click once and wait') #if the submit button is pressed if button: st.success('audio submitted, processing is slow give us some few seconds') try: if upload is not None: with NamedTemporaryFile() as temp: temp.write(upload.getvalue()) temp.seek(0) text = make_text(temp.name) st.write(text) except: st.write("we can't process your request at the moment") else: st.success('No Audio data yet')