import streamlit as st from Inference import Inferencer def app_diffsingerkr(): if not 'diffsingerkr_duration' in st.session_state.keys(): st.session_state.diffsingerkr_duration = '' if not 'diffsingerkr_lyric' in st.session_state.keys(): st.session_state.diffsingerkr_lyric = '' if not 'diffsingerkr_note' in st.session_state.keys(): st.session_state.diffsingerkr_note = '' if not 'inferencer' in st.session_state.keys(): st.session_state.inferencer = Inferencer( hp_path= 'Hyper_Parameters.yaml', checkpoint_path= 'Checkpoint/S_200000.pt', batch_size= 1 ) st.title('DiffSinger-KR') st.markdown('* This code is an implementation of DiffSinger for Korean.') st.markdown('* When music score which is note, duration, and lyric information are entered, singing voices are synthesized accordingly.') st.markdown('* Due to the range of the trained dataset, the supported notes are between 65 and 89.') st.markdown('* Please refer to the [here](https://github.com/CODEJIN/DiffSingerKR) for the source code for training the model.') st.markdown('''---''') status_indicator = st.empty() status_indicator.header('Insert the music!') st.markdown('''---''') example1_col, example2_col, example3_col, _ = st.columns(4) if example1_col.button('Example 1'): st.session_state.diffsingerkr_duration = '0.52,0.17,0.35,0.35,0.35,0.35,0.70,0.35,0.35,0.70,0.35,0.35,0.70,0.52,0.17,0.35,0.35,0.35,0.35,0.70,0.35,0.35,0.35,0.35,1.39' st.session_state.diffsingerkr_lyric = '떴,다,떴,다,비,행,기,날,아,라,날,아,라,높,이,높,이,날,아,라,우,리,비,행,기' st.session_state.diffsingerkr_note = '76,74,72,74,76,76,76,74,74,74,76,79,79,76,74,72,74,76,76,76,74,74,76,74,72' st.experimental_rerun() if example2_col.button('Example 2'): st.session_state.diffsingerkr_duration = '0.53,0.52,0.50,0.57,0.58,0.46,0.48,0.50,0.37,0.13,0.43,0.21,0.57,0.43,0.49,1.44,0.26,0.49,0.14,0.13,0.57,0.26,0.06,0.15,0.63,0.26,0.51,0.20,0.48,0.72,0.22' st.session_state.diffsingerkr_lyric = '만,나,고,,난,외,로,움,을,,알,았,어,내,겐,,관,심,조,,차,,없,,다,는,걸,,알,면,서' st.session_state.diffsingerkr_note = '76,78,79,0,71,74,72,71,72,0,71,69,69,71,74,0,79,78,79,0,71,0,74,0,74,72,72,0,71,71,69' st.experimental_rerun() if example3_col.button('Example 3'): st.session_state.diffsingerkr_duration = '0.33,0.16,0.33,0.49,0.33,0.16,0.81,0.33,0.16,0.16,0.33,0.16,0.49,0.16,0.82,0.33,0.16,0.33,0.49,0.33,0.16,0.33,0.49,0.33,0.33,0.16,0.33,1.47,0.33,0.16,0.33,0.49,0.33,0.16,0.81,0.33,0.16,0.16,0.33,0.16,0.49,0.16,0.82,0.33,0.16,0.33,0.16,0.33,0.49,0.16,0.33,0.33,0.33,0.33,0.16,0.33,0.82' st.session_state.diffsingerkr_lyric = '마,음,울,적,한,날,에,,거,리,를,걸,어,보,고,향,기,로,운,칵,테,일,에,취,해,도,보,고,한,편,의,시,가,있,는,,전,시,회,장,도,가,고,밤,새,도,,록,그,리,움,에,편,질,쓰,고,파' st.session_state.diffsingerkr_note = '80,80,80,87,85,84,82,0,84,84,84,85,84,79,79,77,77,77,80,80,78,77,75,77,80,79,80,82,80,80,80,87,85,84,82,0,84,84,84,85,84,79,79,77,77,77,79,80,80,77,75,75,77,80,79,82,80' st.experimental_rerun() st.markdown('''---''') duration = st.text_input('Duration', value= st.session_state.diffsingerkr_duration) lyric = st.text_input('Lyric', value= st.session_state.diffsingerkr_lyric) note = st.text_input('Note', value= st.session_state.diffsingerkr_note) singer = 'CSD' genre = 'Children' key_adjustment = st.select_slider( label= 'Key adjustment', options= [x for x in range(-6, 7)], value= 0 ) if st.button("Generate!"): if duration != '' and lyric != '' and note != '': status_indicator.header('Generating...') audio = st.session_state.inferencer.Inference_Epoch( message_times_list= [[float(x) for x in duration.strip().split(',')]], lyrics= [[x for x in lyric.strip().split(',')]], notes= [[ (int(x) + key_adjustment if int(x) != 0 else int(x)) for x in note.strip().split(',') ]], singers= [singer], genres= [genre] )[0] st.audio( audio, format="audio/wav", start_time=0, sample_rate= st.session_state.inferencer.hp.Sound.Sample_Rate ) status_indicator.header('Done.') if __name__ == '__main__': app_diffsingerkr()