File size: 2,589 Bytes
5c60553
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import streamlit as st
st.set_page_config(page_title="SpeechT5", page_icon = "static/images/PLAYGROUND_LOGO_REDESIGN_IMAGE.png")
hide_menu_style = """

<style>

footer {visibility: hidden;}

</style>

"""
st.markdown(hide_menu_style, unsafe_allow_html= True)

import glob
from src.model import Model, dataset_dict

if "model_name" not in st.session_state:
    st.session_state.model_name = None
    st.session_state.audio = None
    st.session_state.wav_file = None
    
with st.sidebar.form("my_form"):

    text = st.text_input("Your input: ")
    model_name = st.selectbox(label="Model: ", options=["truong-xuan-linh/speecht5-vietnamese-commonvoice", 
                                                        "truong-xuan-linh/speecht5-vietnamese-voiceclone-lsvsc",
                                                        "truong-xuan-linh/speecht5-vietnamese-hlpcvoice",
                                                        "truong-xuan-linh/speecht5-vietnamese-vstnvoice",
                                                        "truong-xuan-linh/speecht5-vietnamese-kcbnvoice",
                                                        "truong-xuan-linh/speecht5-irmvivoice",
                                                        "truong-xuan-linh/speecht5-vietnamese-voiceclone",
                                                        "truong-xuan-linh/speecht5-multilingual-voiceclone-speechbrain",
                                                        "truong-xuan-linh/speecht5-vietnamese-voiceclone-v3",
                                                        "truong-xuan-linh/speecht5-multilingual-voiceclone-pynote",
                                                        "truong-xuan-linh/speecht5-multilingual-voiceclone-speechbrain-nonverbal"])
    
    speaker_id = st.selectbox("source voice", options= list(dataset_dict.keys()))
    speaker_url = st.text_input("speaker url", value="")
    # speaker_id = st.selectbox("source voice", options= glob.glob("voices/*.wav"))
    if st.session_state.model_name != model_name :
        st.session_state.model_name = model_name
        st.session_state.model = Model(model_name=model_name)
        st.session_state.speaker_id = speaker_id
        
    # Every form must have a submit button.
    submitted = st.form_submit_button("Submit")
    if submitted:
        st.session_state.audio = st.session_state.model.inference(text=text, speaker_id=speaker_id, speaker_url=speaker_url)
        
audio_holder = st.empty()
audio_holder.audio(st.session_state.audio, sample_rate=16000)