File size: 4,738 Bytes
67d041f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import streamlit as st

from Inference import Inferencer

def app_diffsingerkr():
    if not 'diffsingerkr_duration' in st.session_state.keys():
        st.session_state.diffsingerkr_duration = ''
    if not 'diffsingerkr_lyric' in st.session_state.keys():
        st.session_state.diffsingerkr_lyric = ''
    if not 'diffsingerkr_note' in st.session_state.keys():
        st.session_state.diffsingerkr_note = ''
    if not 'inferencer' in st.session_state.keys():
        st.session_state.inferencer = Inferencer(
            hp_path= 'Hyper_Parameters.yaml',
            checkpoint_path= 'Checkpoint/S_200000.pt',
            batch_size= 1
            )

    st.title('DiffSinger-KR')
    st.markdown('* This code is an implementation of DiffSinger for Korean.')
    st.markdown('* When music score which is note, duration, and lyric information are entered, singing voices are synthesized accordingly.')
    st.markdown('* Due to the range of the trained dataset, the supported notes are between 65 and 89.')
    st.markdown('* Please refer to the [here](https://github.com/CODEJIN/DiffSingerKR) for the source code for training the model.')

    st.markdown('''---''')
    status_indicator = st.empty()
    status_indicator.header('Insert the music!')
    st.markdown('''---''')    
    example1_col, example2_col, example3_col, _ = st.columns(4)
    if example1_col.button('Example 1'):
        st.session_state.diffsingerkr_duration = '0.52,0.17,0.35,0.35,0.35,0.35,0.70,0.35,0.35,0.70,0.35,0.35,0.70,0.52,0.17,0.35,0.35,0.35,0.35,0.70,0.35,0.35,0.35,0.35,1.39'
        st.session_state.diffsingerkr_lyric = '๋–ด,๋‹ค,๋–ด,๋‹ค,๋น„,ํ–‰,๊ธฐ,๋‚ ,์•„,๋ผ,๋‚ ,์•„,๋ผ,๋†’,์ด,๋†’,์ด,๋‚ ,์•„,๋ผ,์šฐ,๋ฆฌ,๋น„,ํ–‰,๊ธฐ'
        st.session_state.diffsingerkr_note = '76,74,72,74,76,76,76,74,74,74,76,79,79,76,74,72,74,76,76,76,74,74,76,74,72'
        st.experimental_rerun()
    if example2_col.button('Example 2'):
        st.session_state.diffsingerkr_duration = '0.53,0.52,0.50,0.57,0.58,0.46,0.48,0.50,0.37,0.13,0.43,0.21,0.57,0.43,0.49,1.44,0.26,0.49,0.14,0.13,0.57,0.26,0.06,0.15,0.63,0.26,0.51,0.20,0.48,0.72,0.22'
        st.session_state.diffsingerkr_lyric = '๋งŒ,๋‚˜,๊ณ ,<X>,๋‚œ,์™ธ,๋กœ,์›€,์„,<X>,์•Œ,์•˜,์–ด,๋‚ด,๊ฒ,<X>,๊ด€,์‹ฌ,์กฐ,<X>,์ฐจ,<X>,์—†,<X>,๋‹ค,๋Š”,๊ฑธ,<X>,์•Œ,๋ฉด,์„œ'
        st.session_state.diffsingerkr_note = '76,78,79,0,71,74,72,71,72,0,71,69,69,71,74,0,79,78,79,0,71,0,74,0,74,72,72,0,71,71,69'
        st.experimental_rerun()
    if example3_col.button('Example 3'):
        st.session_state.diffsingerkr_duration = '0.33,0.16,0.33,0.49,0.33,0.16,0.81,0.33,0.16,0.16,0.33,0.16,0.49,0.16,0.82,0.33,0.16,0.33,0.49,0.33,0.16,0.33,0.49,0.33,0.33,0.16,0.33,1.47,0.33,0.16,0.33,0.49,0.33,0.16,0.81,0.33,0.16,0.16,0.33,0.16,0.49,0.16,0.82,0.33,0.16,0.33,0.16,0.33,0.49,0.16,0.33,0.33,0.33,0.33,0.16,0.33,0.82'
        st.session_state.diffsingerkr_lyric = '๋งˆ,์Œ,์šธ,์ ,ํ•œ,๋‚ ,์—,<X>,๊ฑฐ,๋ฆฌ,๋ฅผ,๊ฑธ,์–ด,๋ณด,๊ณ ,ํ–ฅ,๊ธฐ,๋กœ,์šด,์นต,ํ…Œ,์ผ,์—,์ทจ,ํ•ด,๋„,๋ณด,๊ณ ,ํ•œ,ํŽธ,์˜,์‹œ,๊ฐ€,์žˆ,๋Š”,<X>,์ „,์‹œ,ํšŒ,์žฅ,๋„,๊ฐ€,๊ณ ,๋ฐค,์ƒˆ,๋„,<X>,๋ก,๊ทธ,๋ฆฌ,์›€,์—,ํŽธ,์งˆ,์“ฐ,๊ณ ,ํŒŒ'
        st.session_state.diffsingerkr_note = '80,80,80,87,85,84,82,0,84,84,84,85,84,79,79,77,77,77,80,80,78,77,75,77,80,79,80,82,80,80,80,87,85,84,82,0,84,84,84,85,84,79,79,77,77,77,79,80,80,77,75,75,77,80,79,82,80'
        st.experimental_rerun()
    st.markdown('''---''')
    duration = st.text_input('Duration', value= st.session_state.diffsingerkr_duration)
    lyric = st.text_input('Lyric', value= st.session_state.diffsingerkr_lyric)
    note = st.text_input('Note', value= st.session_state.diffsingerkr_note)
    singer = 'CSD'
    genre = 'Children'
    key_adjustment = st.select_slider(
        label= 'Key adjustment',
        options= [x for x in range(-6, 7)],
        value= 0
        )

    if st.button("Generate!"):
        if duration != '' and lyric != '' and note != '':
            status_indicator.header('Generating...')
            audio = st.session_state.inferencer.Inference_Epoch(
                message_times_list= [[float(x) for x in duration.strip().split(',')]],
                lyrics= [[x for x in lyric.strip().split(',')]],
                notes= [[
                    (int(x) + key_adjustment if int(x) != 0 else int(x))
                    for x in note.strip().split(',')
                    ]],
                singers= [singer],
                genres= [genre]
                )[0]

            st.audio(
                audio,
                format="audio/wav",
                start_time=0,
                sample_rate= st.session_state.inferencer.hp.Sound.Sample_Rate
                )

            status_indicator.header('Done.')

if __name__ == '__main__':
    app_diffsingerkr()