Spaces:
Sleeping
Sleeping
kevinwang676
commited on
Commit
•
4ebcd7e
1
Parent(s):
a321207
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import torch
|
3 |
+
import se_extractor
|
4 |
+
from api import BaseSpeakerTTS, ToneColorConverter
|
5 |
+
|
6 |
+
ckpt_base_en = 'checkpoints/checkpoints/base_speakers/EN'
|
7 |
+
ckpt_converter_en = 'checkpoints/checkpoints/converter'
|
8 |
+
device = 'cuda:0'
|
9 |
+
|
10 |
+
base_speaker_tts = BaseSpeakerTTS(f'{ckpt_base_en}/config.json', device=device)
|
11 |
+
base_speaker_tts.load_ckpt(f'{ckpt_base_en}/checkpoint.pth')
|
12 |
+
|
13 |
+
tone_color_converter = ToneColorConverter(f'{ckpt_converter_en}/config.json', device=device)
|
14 |
+
tone_color_converter.load_ckpt(f'{ckpt_converter_en}/checkpoint.pth')
|
15 |
+
|
16 |
+
from tts_voice import tts_order_voice
|
17 |
+
import edge_tts
|
18 |
+
import gradio as gr
|
19 |
+
import tempfile
|
20 |
+
import anyio
|
21 |
+
|
22 |
+
def vc_en(text, audio_ref, style_mode):
|
23 |
+
if style_mode=="default":
|
24 |
+
source_se = torch.load(f'{ckpt_base_en}/en_default_se.pth').to(device)
|
25 |
+
reference_speaker = audio_ref
|
26 |
+
target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, target_dir='processed', vad=True)
|
27 |
+
save_path = "output.wav"
|
28 |
+
|
29 |
+
# Run the base speaker tts
|
30 |
+
src_path = "tmp.wav"
|
31 |
+
base_speaker_tts.tts(text, src_path, speaker='default', language='English', speed=1.0)
|
32 |
+
|
33 |
+
# Run the tone color converter
|
34 |
+
encode_message = "@MyShell"
|
35 |
+
tone_color_converter.convert(
|
36 |
+
audio_src_path=src_path,
|
37 |
+
src_se=source_se,
|
38 |
+
tgt_se=target_se,
|
39 |
+
output_path=save_path,
|
40 |
+
message=encode_message)
|
41 |
+
|
42 |
+
else:
|
43 |
+
source_se = torch.load(f'{ckpt_base_en}/en_style_se.pth').to(device)
|
44 |
+
reference_speaker = audio_ref
|
45 |
+
target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, target_dir='processed', vad=True)
|
46 |
+
|
47 |
+
save_path = "output.wav"
|
48 |
+
|
49 |
+
# Run the base speaker tts
|
50 |
+
src_path = "tmp.wav"
|
51 |
+
base_speaker_tts.tts(text, src_path, speaker=style_mode, language='English', speed=0.9)
|
52 |
+
|
53 |
+
# Run the tone color converter
|
54 |
+
encode_message = "@MyShell"
|
55 |
+
tone_color_converter.convert(
|
56 |
+
audio_src_path=src_path,
|
57 |
+
src_se=source_se,
|
58 |
+
tgt_se=target_se,
|
59 |
+
output_path=save_path,
|
60 |
+
message=encode_message)
|
61 |
+
|
62 |
+
return "output.wav"
|
63 |
+
|
64 |
+
language_dict = tts_order_voice
|
65 |
+
|
66 |
+
base_speaker = "base_audio.mp3"
|
67 |
+
source_se, audio_name = se_extractor.get_se(base_speaker, tone_color_converter, vad=True)
|
68 |
+
|
69 |
+
async def text_to_speech_edge(text, audio_ref, language_code):
|
70 |
+
voice = language_dict[language_code]
|
71 |
+
communicate = edge_tts.Communicate(text, voice)
|
72 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
73 |
+
tmp_path = tmp_file.name
|
74 |
+
|
75 |
+
await communicate.save(tmp_path)
|
76 |
+
|
77 |
+
reference_speaker = audio_ref
|
78 |
+
target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, target_dir='processed', vad=True)
|
79 |
+
save_path = "output.wav"
|
80 |
+
|
81 |
+
# Run the tone color converter
|
82 |
+
encode_message = "@MyShell"
|
83 |
+
tone_color_converter.convert(
|
84 |
+
audio_src_path=tmp_path,
|
85 |
+
src_se=source_se,
|
86 |
+
tgt_se=target_se,
|
87 |
+
output_path=save_path,
|
88 |
+
message=encode_message)
|
89 |
+
|
90 |
+
return "output.wav"
|
91 |
+
|
92 |
+
app = gr.Blocks()
|
93 |
+
|
94 |
+
with app:
|
95 |
+
gr.Markdown("# <center>🥳💕🎶 OpenVoice 3秒语音情感真实复刻</center>")
|
96 |
+
gr.Markdown("## <center>🌟 只需3秒语音,真实复刻说话语气及情感,无需训练! </center>")
|
97 |
+
gr.Markdown("### <center>🌊 更多精彩应用,敬请关注[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>")
|
98 |
+
with gr.Tab("💕语音情感合成"):
|
99 |
+
with gr.Row():
|
100 |
+
with gr.Column():
|
101 |
+
inp1 = gr.Textbox(lines=5, label="请输入您想转换的英文文本")
|
102 |
+
inp2 = gr.Audio(label="请上传您喜欢的语音文件", type="filepath")
|
103 |
+
inp3 = gr.Dropdown(label="请选择一种语音情感", info="🙂default😊friendly🤫whispering😄cheerful😱terrified😡angry😢sad", choices=["default", "friendly", "whispering", "cheerful", "terrified", "angry", "sad"], value="default")
|
104 |
+
|
105 |
+
btn1 = gr.Button("开始语音情感真实复刻吧!", variant="primary")
|
106 |
+
|
107 |
+
with gr.Column():
|
108 |
+
out1 = gr.Audio(label="为您合成的专属语音", type="filepath")
|
109 |
+
btn1.click(vc_en, [inp1, inp2, inp3], out1)
|
110 |
+
|
111 |
+
with gr.Tab("🌟多语言声音复刻"):
|
112 |
+
with gr.Row():
|
113 |
+
with gr.Column():
|
114 |
+
inp4 = gr.Textbox(lines=5, label="请输入您想转换的英文文本")
|
115 |
+
inp5 = gr.Audio(label="请上传您喜欢的语音文件", type="filepath")
|
116 |
+
inp6 = gr.Dropdown(choices=list(language_dict.keys()), value=list(language_dict.keys())[15], label="请选择文本对应的语言")
|
117 |
+
|
118 |
+
btn2 = gr.Button("开始语音情感真实复刻吧!", variant="primary")
|
119 |
+
|
120 |
+
with gr.Column():
|
121 |
+
out2 = gr.Audio(label="为您合成的专属语音", type="filepath")
|
122 |
+
btn2.click(text_to_speech_edge, [inp4, inp5, inp6], out2)
|
123 |
+
|
124 |
+
gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。Get your OpenAI API Key [here](https://platform.openai.com/api-keys).</center>")
|
125 |
+
gr.HTML('''
|
126 |
+
<div class="footer">
|
127 |
+
<p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘
|
128 |
+
</p>
|
129 |
+
</div>
|
130 |
+
''')
|
131 |
+
|
132 |
+
app.launch(show_error=True)
|