Spaces:
Running
Running
import os | |
import torch | |
import se_extractor | |
from api import BaseSpeakerTTS, ToneColorConverter | |
ckpt_base_en = 'checkpoints/checkpoints/base_speakers/EN' | |
ckpt_converter_en = 'checkpoints/checkpoints/converter' | |
#device = 'cuda:0' | |
device = "cpu" | |
base_speaker_tts = BaseSpeakerTTS(f'{ckpt_base_en}/config.json', device=device) | |
base_speaker_tts.load_ckpt(f'{ckpt_base_en}/checkpoint.pth') | |
tone_color_converter = ToneColorConverter(f'{ckpt_converter_en}/config.json', device=device) | |
tone_color_converter.load_ckpt(f'{ckpt_converter_en}/checkpoint.pth') | |
ckpt_base_zh = 'checkpoints/checkpoints/base_speakers/ZH' | |
base_speaker_tts_zh = BaseSpeakerTTS(f'{ckpt_base_zh}/config.json', device=device) | |
base_speaker_tts_zh.load_ckpt(f'{ckpt_base_zh}/checkpoint.pth') | |
from tts_voice import tts_order_voice | |
import edge_tts | |
import gradio as gr | |
import tempfile | |
import anyio | |
def vc_en(text, audio_ref, style_mode): | |
if style_mode=="default": | |
source_se = torch.load(f'{ckpt_base_en}/en_default_se.pth').to(device) | |
reference_speaker = audio_ref | |
target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, target_dir='processed', vad=True) | |
save_path = "output.wav" | |
# Run the base speaker tts | |
src_path = "tmp.wav" | |
base_speaker_tts.tts(text, src_path, speaker='default', language='English', speed=1.0) | |
# Run the tone color converter | |
encode_message = "@MyShell" | |
tone_color_converter.convert( | |
audio_src_path=src_path, | |
src_se=source_se, | |
tgt_se=target_se, | |
output_path=save_path, | |
message=encode_message) | |
else: | |
source_se = torch.load(f'{ckpt_base_en}/en_style_se.pth').to(device) | |
reference_speaker = audio_ref | |
target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, target_dir='processed', vad=True) | |
save_path = "output.wav" | |
# Run the base speaker tts | |
src_path = "tmp.wav" | |
base_speaker_tts.tts(text, src_path, speaker=style_mode, language='English', speed=0.9) | |
# Run the tone color converter | |
encode_message = "@MyShell" | |
tone_color_converter.convert( | |
audio_src_path=src_path, | |
src_se=source_se, | |
tgt_se=target_se, | |
output_path=save_path, | |
message=encode_message) | |
return "output.wav" | |
def vc_zh(text, audio_ref): | |
source_se = torch.load(f'{ckpt_base_zh}/zh_default_se.pth').to(device) | |
save_path = "output.wav" | |
src_path = "tmp.wav" | |
base_speaker_tts_zh.tts(text, src_path, speaker='default', language='Chinese', speed=1.0) | |
reference_speaker = audio_ref | |
target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, target_dir='processed', vad=True) | |
# Run the tone color converter | |
encode_message = "@MyShell" | |
tone_color_converter.convert( | |
audio_src_path=src_path, | |
src_se=source_se, | |
tgt_se=target_se, | |
output_path=save_path, | |
message=encode_message) | |
return "output.wav" | |
language_dict = tts_order_voice | |
base_speaker = "base_audio.mp3" | |
source_se, audio_name = se_extractor.get_se(base_speaker, tone_color_converter, vad=True) | |
async def text_to_speech_edge(text, audio_ref, language_code): | |
voice = language_dict[language_code] | |
communicate = edge_tts.Communicate(text, voice) | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: | |
tmp_path = tmp_file.name | |
await communicate.save(tmp_path) | |
reference_speaker = audio_ref | |
target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, target_dir='processed', vad=True) | |
save_path = "output.wav" | |
# Run the tone color converter | |
encode_message = "@MyShell" | |
tone_color_converter.convert( | |
audio_src_path=tmp_path, | |
src_se=source_se, | |
tgt_se=target_se, | |
output_path=save_path, | |
message=encode_message) | |
return "output.wav" | |
app = gr.Blocks() | |
with app: | |
gr.Markdown("# <center>🥳💕🎶 OpenVoice 3秒语音情感真实复刻</center>") | |
gr.Markdown("## <center>🌟 只需3秒语音,一键复刻说话语气及情感,喜怒哀乐、应有尽有! </center>") | |
gr.Markdown("### <center>🌊 更多精彩应用,敬请关注[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>") | |
with gr.Tab("💕语音情感合成"): | |
with gr.Row(): | |
with gr.Column(): | |
inp1 = gr.Textbox(lines=3, label="请输入您想转换的英文文本") | |
inp2 = gr.Audio(label="请上传您喜欢的语音文件", type="filepath") | |
inp3 = gr.Dropdown(label="请选择一种语音情感", info="🙂default😊friendly🤫whispering😄cheerful😱terrified😡angry😢sad", choices=["default", "friendly", "whispering", "cheerful", "terrified", "angry", "sad"], value="default") | |
btn1 = gr.Button("开始语音情感真实复刻吧!", variant="primary") | |
with gr.Column(): | |
out1 = gr.Audio(label="为您合成的专属语音", type="filepath") | |
btn1.click(vc_en, [inp1, inp2, inp3], out1) | |
with gr.Tab("🎶中文声音复刻"): | |
with gr.Row(): | |
with gr.Column(): | |
inp_zh_1 = gr.Textbox(lines=3, label="请输入您想转换的中文文本") | |
inp_zh_2 = gr.Audio(label="请上传您喜欢的语音文件", type="filepath") | |
btn_zh = gr.Button("开始语音情感真实复刻吧!", variant="primary") | |
with gr.Column(): | |
out_zh = gr.Audio(label="为您合成的专属语音", type="filepath") | |
btn_zh.click(vc_zh, [inp_zh_1, inp_zh_2], out_zh) | |
with gr.Tab("🌟多语言声音复刻"): | |
with gr.Row(): | |
with gr.Column(): | |
inp4 = gr.Textbox(lines=3, label="请输入您想转换的任意语言文本") | |
inp5 = gr.Audio(label="请上传您喜欢的语音文件", type="filepath") | |
inp6 = gr.Dropdown(choices=list(language_dict.keys()), value=list(language_dict.keys())[15], label="请选择文本对应的语言及说话人") | |
btn2 = gr.Button("开始语音情感真实复刻吧!", variant="primary") | |
with gr.Column(): | |
out2 = gr.Audio(label="为您合成的专属语音", type="filepath") | |
btn2.click(text_to_speech_edge, [inp4, inp5, inp6], out2) | |
gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。Get your OpenAI API Key [here](https://platform.openai.com/api-keys).</center>") | |
gr.HTML(''' | |
<div class="footer"> | |
<p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘 | |
</p> | |
</div> | |
''') | |
app.launch(show_error=True) | |