File size: 2,117 Bytes
df2accb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gradio as gr


SUPPORTED_TARGET_SINGERS = {
    "Adele": "vocalist_l1_Adele",
    "Beyonce": "vocalist_l1_Beyonce",
    "Bruno Mars": "vocalist_l1_BrunoMars",
    "John Mayer": "vocalist_l1_JohnMayer",
    "Michael Jackson": "vocalist_l1_MichaelJackson",
    "Taylor Swift": "vocalist_l1_TaylorSwift",
    "Jacky Cheung 张学友": "vocalist_l1_张学友",
    "Jian Li 李健": "vocalist_l1_李健",
    "Feng Wang 汪峰": "vocalist_l1_汪峰",
    "Faye Wong 王菲": "vocalist_l1_王菲",
    "Yijie Shi 石倚洁": "vocalist_l1_石倚洁",
    "Tsai Chin 蔡琴": "vocalist_l1_蔡琴",
    "Ying Na 那英": "vocalist_l1_那英",
    "Eason Chan 陈奕迅": "vocalist_l1_陈奕迅",
    "David Tao 陶喆": "vocalist_l1_陶喆",
}


def svc_inference(
    source_audio,
    target_singer,
    diffusion_steps=1000,
    key_shift_mode="auto",
    key_shift_num=0,
):
    pass


demo_inputs = [
    gr.Audio(
        sources=["upload", "microphone"],
        label="Upload (or record) a song you want to listen",
    ),
    gr.Radio(
        choices=list(SUPPORTED_TARGET_SINGERS.keys()),
        label="Target Singer",
        value="Jian Li 李健",
    ),
    gr.Slider(
        1,
        1000,
        value=1000,
        step=1,
        label="Diffusion Inference Steps",
        info="As the step number increases, the synthesis quality will be better while the inference speed will be lower",
    ),
    gr.Radio(
        choices=["Auto Shift", "Key Shift"],
        value="Auto Shift",
        label="Pitch Shift Control",
        info='If you want to control the specific pitch shift value, you need to choose "Key Shift"',
    ),
    gr.Slider(
        -6,
        6,
        value=0,
        step=1,
        label="Key Shift Values",
        info='How many semitones you want to transpose.	This parameter will work only if you choose "Key Shift"',
    ),
]

demo_outputs = gr.Audio(label="")


demo = gr.Interface(
    fn=svc_inference,
    inputs=demo_inputs,
    outputs=demo_outputs,
    title="Amphion Singing Voice Conversion",
)

if __name__ == "__main__":
    demo.launch(show_api=False)