kevinwang676 commited on
Commit
4ebcd7e
1 Parent(s): a321207

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -0
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import se_extractor
4
+ from api import BaseSpeakerTTS, ToneColorConverter
5
+
6
+ ckpt_base_en = 'checkpoints/checkpoints/base_speakers/EN'
7
+ ckpt_converter_en = 'checkpoints/checkpoints/converter'
8
+ device = 'cuda:0'
9
+
10
+ base_speaker_tts = BaseSpeakerTTS(f'{ckpt_base_en}/config.json', device=device)
11
+ base_speaker_tts.load_ckpt(f'{ckpt_base_en}/checkpoint.pth')
12
+
13
+ tone_color_converter = ToneColorConverter(f'{ckpt_converter_en}/config.json', device=device)
14
+ tone_color_converter.load_ckpt(f'{ckpt_converter_en}/checkpoint.pth')
15
+
16
+ from tts_voice import tts_order_voice
17
+ import edge_tts
18
+ import gradio as gr
19
+ import tempfile
20
+ import anyio
21
+
22
+ def vc_en(text, audio_ref, style_mode):
23
+ if style_mode=="default":
24
+ source_se = torch.load(f'{ckpt_base_en}/en_default_se.pth').to(device)
25
+ reference_speaker = audio_ref
26
+ target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, target_dir='processed', vad=True)
27
+ save_path = "output.wav"
28
+
29
+ # Run the base speaker tts
30
+ src_path = "tmp.wav"
31
+ base_speaker_tts.tts(text, src_path, speaker='default', language='English', speed=1.0)
32
+
33
+ # Run the tone color converter
34
+ encode_message = "@MyShell"
35
+ tone_color_converter.convert(
36
+ audio_src_path=src_path,
37
+ src_se=source_se,
38
+ tgt_se=target_se,
39
+ output_path=save_path,
40
+ message=encode_message)
41
+
42
+ else:
43
+ source_se = torch.load(f'{ckpt_base_en}/en_style_se.pth').to(device)
44
+ reference_speaker = audio_ref
45
+ target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, target_dir='processed', vad=True)
46
+
47
+ save_path = "output.wav"
48
+
49
+ # Run the base speaker tts
50
+ src_path = "tmp.wav"
51
+ base_speaker_tts.tts(text, src_path, speaker=style_mode, language='English', speed=0.9)
52
+
53
+ # Run the tone color converter
54
+ encode_message = "@MyShell"
55
+ tone_color_converter.convert(
56
+ audio_src_path=src_path,
57
+ src_se=source_se,
58
+ tgt_se=target_se,
59
+ output_path=save_path,
60
+ message=encode_message)
61
+
62
+ return "output.wav"
63
+
64
+ language_dict = tts_order_voice
65
+
66
+ base_speaker = "base_audio.mp3"
67
+ source_se, audio_name = se_extractor.get_se(base_speaker, tone_color_converter, vad=True)
68
+
69
+ async def text_to_speech_edge(text, audio_ref, language_code):
70
+ voice = language_dict[language_code]
71
+ communicate = edge_tts.Communicate(text, voice)
72
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
73
+ tmp_path = tmp_file.name
74
+
75
+ await communicate.save(tmp_path)
76
+
77
+ reference_speaker = audio_ref
78
+ target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, target_dir='processed', vad=True)
79
+ save_path = "output.wav"
80
+
81
+ # Run the tone color converter
82
+ encode_message = "@MyShell"
83
+ tone_color_converter.convert(
84
+ audio_src_path=tmp_path,
85
+ src_se=source_se,
86
+ tgt_se=target_se,
87
+ output_path=save_path,
88
+ message=encode_message)
89
+
90
+ return "output.wav"
91
+
92
+ app = gr.Blocks()
93
+
94
+ with app:
95
+ gr.Markdown("# <center>🥳💕🎶 OpenVoice 3秒语音情感真实复刻</center>")
96
+ gr.Markdown("## <center>🌟 只需3秒语音,真实复刻说话语气及情感,无需训练! </center>")
97
+ gr.Markdown("### <center>🌊 更多精彩应用,敬请关注[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>")
98
+ with gr.Tab("💕语音情感合成"):
99
+ with gr.Row():
100
+ with gr.Column():
101
+ inp1 = gr.Textbox(lines=5, label="请输入您想转换的英文文本")
102
+ inp2 = gr.Audio(label="请上传您喜欢的语音文件", type="filepath")
103
+ inp3 = gr.Dropdown(label="请选择一种语音情感", info="🙂default😊friendly🤫whispering😄cheerful😱terrified😡angry😢sad", choices=["default", "friendly", "whispering", "cheerful", "terrified", "angry", "sad"], value="default")
104
+
105
+ btn1 = gr.Button("开始语音情感真实复刻吧!", variant="primary")
106
+
107
+ with gr.Column():
108
+ out1 = gr.Audio(label="为您合成的专属语音", type="filepath")
109
+ btn1.click(vc_en, [inp1, inp2, inp3], out1)
110
+
111
+ with gr.Tab("🌟多语言声音复刻"):
112
+ with gr.Row():
113
+ with gr.Column():
114
+ inp4 = gr.Textbox(lines=5, label="请输入您想转换的英文文本")
115
+ inp5 = gr.Audio(label="请上传您喜欢的语音文件", type="filepath")
116
+ inp6 = gr.Dropdown(choices=list(language_dict.keys()), value=list(language_dict.keys())[15], label="请选择文本对应的语言")
117
+
118
+ btn2 = gr.Button("开始语音情感真实复刻吧!", variant="primary")
119
+
120
+ with gr.Column():
121
+ out2 = gr.Audio(label="为您合成的专属语音", type="filepath")
122
+ btn2.click(text_to_speech_edge, [inp4, inp5, inp6], out2)
123
+
124
+ gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。Get your OpenAI API Key [here](https://platform.openai.com/api-keys).</center>")
125
+ gr.HTML('''
126
+ <div class="footer">
127
+ <p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘
128
+ </p>
129
+ </div>
130
+ ''')
131
+
132
+ app.launch(show_error=True)