import streamlit as st from st_audiorec import st_audiorec #pip install streamlit-audiorec import nemo.collections.asr as nemo_asr from pydub import AudioSegment import subprocess import io import os import uuid @st.cache_resource def get_model(): try: os.makedirs("audio_cache") except: pass if not os.path.exists("./hi_am_model"): print("Downloading Hindi AM") download = subprocess.run(["wget","-P","./hi_am_model","https://storage.googleapis.com/vakyansh-open-models/conformer_models/hindi/filtered_v1_ssl_2022-07-08_19-43-25/Conformer-CTC-BPE-Large.nemo"],capture_output=True, text=True) if download.returncode != 0: raise Exception(f"Hindi Model Download Failed: {download.stderr}") else: print('Downloaded Hindi AM') if not os.path.exists("./en_am_model"): print("Downloading English AM") download = subprocess.run(["wget","-P","./en_am_model","https://storage.googleapis.com/vakyansh-open-models/conformer_models/english/2022-09-13_15-50-48/Conformer-CTC-BPE-Large.nemo"],capture_output=True, text=True) if download.returncode != 0: raise Exception(f"English Model Download Failed: {download.stderr}") else: print('Downloaded English AM') try: en_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from("./en_am_model/Conformer-CTC-BPE-Large.nemo")# /home/tanmay/zb/en_Conformer-CTC-BPE-Large.nemo hi_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from("./hi_am_model/Conformer-CTC-BPE-Large.nemo") #/home/tanmay/zb/hi_Conformer-CTC-BPE-Large.nemo except Exception as e: print("ERROR Loading Model... ",e) exit (1) return en_asr_model, hi_asr_model en_asr_model, hi_asr_model = get_model() st.title("💬 Vocalize: Empower Your Voice ") language = st.selectbox('Enter Your Preferred Language.', ('English', 'Hindi')) """ Hi cord the audio, and get the transcription in real time! Note: Works best for smaller audios """ # col1, col2 = st.columns(2) st.header("Transcribe Your Voice Using Mic") wav_audio_data = st_audiorec() if wav_audio_data: audio_location = "audio_cache/" + str(uuid.uuid4()) + ".wav" audio_file = io.BytesIO(wav_audio_data) audio = AudioSegment.from_file(audio_file) audio = audio.set_sample_width(2) audio = audio.set_channels(1) audio = audio.set_frame_rate(16000) audio.export(audio_location, format="wav") if language == "Hindi": text = hi_asr_model.transcribe([audio_location], logprobs=False)[0] st.write(text) else: text = en_asr_model.transcribe([audio_location], logprobs=False)[0] print(text) st.write("Transcription:") st.write(text) st.header("Transcribe Files") uploaded_file = st.file_uploader("Upload Your Recording", disabled=False) if uploaded_file is not None: # Store the uploaded file: audio_location = "audio_cache/" + str(uuid.uuid4()) + ".wav" with open(audio_location, "wb") as f: f.write(uploaded_file.getvalue()) is_translate = st.button("Click Me For Translation") # Trigger transcription: if is_translate: if language == "Hindi": with st.spinner(): text = hi_asr_model.transcribe([audio_location], logprobs=False)[0] print(text) else: with st.spinner(): text = en_asr_model.transcribe([audio_location], logprobs=False)[0] print(text) st.write(text)