weidexu's picture
Update app.py
fe4ba68
raw
history blame
3.96 kB
import gradio as gr
from TTS.api import TTS
tts1 = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=False)
tts2 = TTS("tts_models/zh-CN/baker/tacotron2-DDC-GST")
import os
import openai
import torch
import torchaudio
from speechbrain.pretrained import SpectralMaskEnhancement
enhance_model = SpectralMaskEnhancement.from_hparams(
source="speechbrain/metricgan-plus-voicebank",
savedir="pretrained_models/metricgan-plus-voicebank",
#run_opts={"device":"cuda"},
)
mes = [
{"role": "system", "content": "You are my personal assistant. Try to be helpful."}
]
def chatgpt(apikey, result):
openai.api_key = apikey
messages = mes
# chatgpt
content = result
messages.append({"role": "user", "content": content})
completion = openai.ChatCompletion.create(
model = "gpt-3.5-turbo",
messages = messages
)
chat_response = completion.choices[0].message.content
messages.append({"role": "assistant", "content": chat_response})
return chat_response
def english(text_en, upload, VoiceMicrophone):
if upload is not None:
tts1.tts_to_file(text_en, speaker_wav = upload, language="en", file_path="output.wav")
else:
tts1.tts_to_file(text_en, speaker_wav = VoiceMicrophone, language="en", file_path="output.wav")
noisy = enhance_model.load_audio(
"output.wav"
).unsqueeze(0)
enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
return "enhanced.wav"
def chinese(text_cn, upload1, VoiceMicrophone1):
if upload1 is not None:
tts2.tts_with_vc_to_file(
text_cn + "。",
speaker_wav=upload1,
file_path="ouptut1.wav"
)
else:
tts2.tts_with_vc_to_file(
text_cn + "。",
speaker_wav=VoiceMicrophone1,
file_path="ouptut1.wav"
)
return "ouptut1.wav"
block = gr.Blocks()
with block:
with gr.Group():
gr.Markdown(
""" # <center>Talk to AI</center>
"""
)
with gr.Box():
with gr.Row().style(mobile_collapse=False, equal_height=True):
inp1 = gr.Textbox(label='请输入您的Openai-API-Key', type = "password")
inp2 = gr.Textbox(label='说点什么吧(中英皆可)')
btn = gr.Button("开始对话吧")
texts1 = gr.Textbox(lines=3, label="ChatGPT的回答")
btn.click(chatgpt, [inp1, inp2], [texts1])
with gr.Box():
with gr.Row().style(mobile_collapse=False, equal_height=True):
inp3 = texts1
inp4 = gr.Audio(source="upload", label = "请上传您喜欢的声音(wav/mp3文件)", type="filepath")
inp5 = gr.Audio(source="microphone", type="filepath", label = '请用麦克风上传您喜欢的声音,与文件上传二选一即可')
btn1 = gr.Button("用喜欢的声音听一听吧(中文)")
out1 = gr.Audio(label="合成的专属声音(中文)")
btn1.click(chinese, [inp3, inp4, inp5], [out1])
with gr.Box():
with gr.Row().style(mobile_collapse=False, equal_height=True):
btn2 = gr.Button("用喜欢的声音听一听吧(英文)")
out2 = gr.Audio(label="合成的专属声音(英文)")
btn2.click(english, [inp3, inp4, inp5], [out2])
gr.Markdown(
""" ### <center>仅供学习交流使用</center>
### <center>Powered by [ChatGPT](https://chat.openai.com/).</center>
"""
)
gr.HTML('''
<div class="footer">
<p>
</p>
<p>
</p>
</div>
''')
block.launch(show_error=True)