import streamlit as st
from st_audiorec import st_audiorec #pip install streamlit-audiorec
import nemo.collections.asr as nemo_asr
from pydub import AudioSegment
import subprocess
import io
import os
import uuid 


@st.cache_resource
def get_model():
    try:
        os.makedirs("audio_cache")
    except: 
        pass

    if not os.path.exists("./hi_am_model"):
        print("Downloading Hindi AM")
        download = subprocess.run(["wget","-P","./hi_am_model","https://storage.googleapis.com/vakyansh-open-models/conformer_models/hindi/filtered_v1_ssl_2022-07-08_19-43-25/Conformer-CTC-BPE-Large.nemo"],capture_output=True, text=True)
        
        if download.returncode != 0:
                raise Exception(f"Hindi Model Download Failed: {download.stderr}")
        
        else:
            print('Downloaded Hindi AM')

    if not os.path.exists("./en_am_model"):
        print("Downloading English AM")
        download = subprocess.run(["wget","-P","./en_am_model","https://storage.googleapis.com/vakyansh-open-models/conformer_models/english/2022-09-13_15-50-48/Conformer-CTC-BPE-Large.nemo"],capture_output=True, text=True)
        
        if download.returncode != 0:
                raise Exception(f"English Model Download Failed: {download.stderr}")
        
        else:
            print('Downloaded English AM')

    try:
        en_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from("./en_am_model/Conformer-CTC-BPE-Large.nemo")# /home/tanmay/zb/en_Conformer-CTC-BPE-Large.nemo
        hi_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from("./hi_am_model/Conformer-CTC-BPE-Large.nemo") #/home/tanmay/zb/hi_Conformer-CTC-BPE-Large.nemo
    except Exception as e:
        print("ERROR Loading Model... ",e)
        exit (1)

    return en_asr_model, hi_asr_model


en_asr_model, hi_asr_model = get_model()

st.title("💬 Vocalize: Empower Your Voice ")

language = st.selectbox('Enter Your Preferred Language.', ('English', 'Hindi'))

"""
Hi cord the audio, and get the transcription in real time!
Note: Works best for smaller audios
"""

# col1, col2 = st.columns(2)

st.header("Transcribe Your Voice Using Mic")
wav_audio_data = st_audiorec()
if wav_audio_data:
    
    audio_location = "audio_cache/" + str(uuid.uuid4()) + ".wav"
    audio_file = io.BytesIO(wav_audio_data)
    audio = AudioSegment.from_file(audio_file)
    audio = audio.set_sample_width(2)
    audio = audio.set_channels(1)
    audio = audio.set_frame_rate(16000)
    audio.export(audio_location, format="wav")

    if language == "Hindi":
        text = hi_asr_model.transcribe([audio_location], logprobs=False)[0]
        st.write(text)
    else:
        text = en_asr_model.transcribe([audio_location], logprobs=False)[0]

    print(text)
    st.write("Transcription:")
    st.write(text)

st.header("Transcribe Files")
uploaded_file = st.file_uploader("Upload Your Recording", disabled=False)
if uploaded_file is not None:
    # Store the uploaded file:
    audio_location = "audio_cache/" + str(uuid.uuid4()) + ".wav"
    with open(audio_location, "wb") as f:
        f.write(uploaded_file.getvalue())
        
    is_translate = st.button("Click Me For Translation")
    # Trigger transcription:
    if is_translate:                
        if language == "Hindi":
            with st.spinner():
                
                text = hi_asr_model.transcribe([audio_location], logprobs=False)[0]
                print(text)
        else:
            with st.spinner():
                text = en_asr_model.transcribe([audio_location], logprobs=False)[0]
                print(text)
        st.write(text)