import gradio as gr from TTS.api import TTS tts1 = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=False) tts2 = TTS("tts_models/zh-CN/baker/tacotron2-DDC-GST") import os import openai import torch import torchaudio from speechbrain.pretrained import SpectralMaskEnhancement enhance_model = SpectralMaskEnhancement.from_hparams( source="speechbrain/metricgan-plus-voicebank", savedir="pretrained_models/metricgan-plus-voicebank", #run_opts={"device":"cuda"}, ) mes = [ {"role": "system", "content": "You are my personal assistant. Try to be helpful."} ] def chatgpt(apikey, result): openai.api_key = apikey messages = mes # chatgpt content = result messages.append({"role": "user", "content": content}) completion = openai.ChatCompletion.create( model = "gpt-3.5-turbo", messages = messages ) chat_response = completion.choices[0].message.content messages.append({"role": "assistant", "content": chat_response}) return chat_response def english(text_en, upload, VoiceMicrophone): if upload is not None: tts1.tts_to_file(text_en, speaker_wav = upload, language="en", file_path="output.wav") else: tts1.tts_to_file(text_en, speaker_wav = VoiceMicrophone, language="en", file_path="output.wav") noisy = enhance_model.load_audio( "output.wav" ).unsqueeze(0) enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.])) torchaudio.save("enhanced.wav", enhanced.cpu(), 16000) return "enhanced.wav" def chinese(text_cn, upload1, VoiceMicrophone1): if upload1 is not None: tts2.tts_with_vc_to_file( text_cn + "。", speaker_wav=upload1, file_path="ouptut1.wav" ) else: tts2.tts_with_vc_to_file( text_cn + "。", speaker_wav=VoiceMicrophone1, file_path="ouptut1.wav" ) return "ouptut1.wav" block = gr.Blocks() with block: with gr.Group(): gr.Markdown( """ #