import gradio as gr from TTS.api import TTS tts1 = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=False) tts2 = TTS("tts_models/zh-CN/baker/tacotron2-DDC-GST") import os import openai import torch import torchaudio from speechbrain.pretrained import SpectralMaskEnhancement enhance_model = SpectralMaskEnhancement.from_hparams( source="speechbrain/metricgan-plus-voicebank", savedir="pretrained_models/metricgan-plus-voicebank", #run_opts={"device":"cuda"}, ) mes = [ {"role": "system", "content": "You are my personal assistant. Try to be helpful."} ] def chatgpt(apikey, result): openai.api_key = apikey messages = mes # chatgpt content = result messages.append({"role": "user", "content": content}) completion = openai.ChatCompletion.create( model = "gpt-3.5-turbo", messages = messages ) chat_response = completion.choices[0].message.content messages.append({"role": "assistant", "content": chat_response}) return chat_response def english(text_en, upload, VoiceMicrophone): if upload is not None: tts1.tts_to_file(text_en, speaker_wav = upload, language="en", file_path="output.wav") else: tts1.tts_to_file(text_en, speaker_wav = VoiceMicrophone, language="en", file_path="output.wav") noisy = enhance_model.load_audio( "output.wav" ).unsqueeze(0) enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.])) torchaudio.save("enhanced.wav", enhanced.cpu(), 16000) return "enhanced.wav" def chinese(text_cn, upload1, VoiceMicrophone1): if upload1 is not None: tts2.tts_with_vc_to_file( text_cn + "。", speaker_wav=upload1, file_path="ouptut1.wav" ) else: tts2.tts_with_vc_to_file( text_cn + "。", speaker_wav=VoiceMicrophone1, file_path="ouptut1.wav" ) return "ouptut1.wav" block = gr.Blocks() with block: with gr.Group(): gr.Markdown( """ #
Talk to AI
""" ) with gr.Box(): with gr.Row().style(mobile_collapse=False, equal_height=True): inp1 = gr.Textbox(label='请输入您的Openai-API-Key', type = "password") inp2 = gr.Textbox(label='说点什么吧(中英皆可)') btn = gr.Button("开始对话吧") texts1 = gr.Textbox(lines=3, label="ChatGPT的回答") btn.click(chatgpt, [inp1, inp2], [texts1]) with gr.Box(): with gr.Row().style(mobile_collapse=False, equal_height=True): inp3 = texts1 inp4 = gr.Audio(source="upload", label = "请上传您喜欢的声音(wav/mp3文件)", type="filepath") inp5 = gr.Audio(source="microphone", type="filepath", label = '请用麦克风上传您喜欢的声音,与文件上传二选一即可') btn1 = gr.Button("用喜欢的声音听一听吧(中文)") out1 = gr.Audio(label="合成的专属声音(中文)") btn1.click(chinese, [inp3, inp4, inp5], [out1]) with gr.Box(): with gr.Row().style(mobile_collapse=False, equal_height=True): btn2 = gr.Button("用喜欢的声音听一听吧(英文)") out2 = gr.Audio(label="合成的专属声音(英文)") btn2.click(english, [inp3, inp4, inp5], [out2]) gr.Markdown( """ ###
仅供学习交流使用
###
Powered by [ChatGPT](https://chat.openai.com/).
""" ) gr.HTML(''' ''') block.launch(show_error=True)