Nemov2 / app.py
AkshatJain1402's picture
added spinner
cf597fa verified
raw
history blame
No virus
3.63 kB
import streamlit as st
from st_audiorec import st_audiorec #pip install streamlit-audiorec
import nemo.collections.asr as nemo_asr
from pydub import AudioSegment
import subprocess
import io
import os
import uuid
@st.cache_resource
def get_model():
try:
os.makedirs("audio_cache")
except:
pass
if not os.path.exists("./hi_am_model"):
print("Downloading Hindi AM")
download = subprocess.run(["wget","-P","./hi_am_model","https://storage.googleapis.com/vakyansh-open-models/conformer_models/hindi/filtered_v1_ssl_2022-07-08_19-43-25/Conformer-CTC-BPE-Large.nemo"],capture_output=True, text=True)
if download.returncode != 0:
raise Exception(f"Hindi Model Download Failed: {download.stderr}")
else:
print('Downloaded Hindi AM')
if not os.path.exists("./en_am_model"):
print("Downloading English AM")
download = subprocess.run(["wget","-P","./en_am_model","https://storage.googleapis.com/vakyansh-open-models/conformer_models/english/2022-09-13_15-50-48/Conformer-CTC-BPE-Large.nemo"],capture_output=True, text=True)
if download.returncode != 0:
raise Exception(f"English Model Download Failed: {download.stderr}")
else:
print('Downloaded English AM')
try:
en_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from("./en_am_model/Conformer-CTC-BPE-Large.nemo")# /home/tanmay/zb/en_Conformer-CTC-BPE-Large.nemo
hi_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from("./hi_am_model/Conformer-CTC-BPE-Large.nemo") #/home/tanmay/zb/hi_Conformer-CTC-BPE-Large.nemo
except Exception as e:
print("ERROR Loading Model... ",e)
exit (1)
return en_asr_model, hi_asr_model
en_asr_model, hi_asr_model = get_model()
st.title("💬 Vocalize: Empower Your Voice ")
language = st.selectbox('Enter Your Preferred Language.', ('English', 'Hindi'))
"""
Hi cord the audio, and get the transcription in real time!
Note: Works best for smaller audios
"""
# col1, col2 = st.columns(2)
st.header("Transcribe Your Voice Using Mic")
wav_audio_data = st_audiorec()
if wav_audio_data:
audio_location = "audio_cache/" + str(uuid.uuid4()) + ".wav"
audio_file = io.BytesIO(wav_audio_data)
audio = AudioSegment.from_file(audio_file)
audio = audio.set_sample_width(2)
audio = audio.set_channels(1)
audio = audio.set_frame_rate(16000)
audio.export(audio_location, format="wav")
if language == "Hindi":
text = hi_asr_model.transcribe([audio_location], logprobs=False)[0]
st.write(text)
else:
text = en_asr_model.transcribe([audio_location], logprobs=False)[0]
print(text)
st.write("Transcription:")
st.write(text)
st.header("Transcribe Files")
uploaded_file = st.file_uploader("Upload Your Recording", disabled=False)
if uploaded_file is not None:
# Store the uploaded file:
audio_location = "audio_cache/" + str(uuid.uuid4()) + ".wav"
with open(audio_location, "wb") as f:
f.write(uploaded_file.getvalue())
is_translate = st.button("Click Me For Translation")
# Trigger transcription:
if is_translate:
if language == "Hindi":
with st.spinner():
text = hi_asr_model.transcribe([audio_location], logprobs=False)[0]
print(text)
else:
with st.spinner():
text = en_asr_model.transcribe([audio_location], logprobs=False)[0]
print(text)
st.write(text)