ChatGPT-with-Voice-Cloning-for-All

Runtime error

App Files Files Community

ChatGPT-with-Voice-Cloning-for-All / app.py

weidexu

Update app.py

fe4ba68 over 1 year ago

raw

history blame

3.96 kB

	import gradio as gr

	from TTS.api import TTS

	tts1 = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=False)

	tts2 = TTS("tts_models/zh-CN/baker/tacotron2-DDC-GST")

	import os

	import openai

	import torch
	import torchaudio
	from speechbrain.pretrained import SpectralMaskEnhancement

	enhance_model = SpectralMaskEnhancement.from_hparams(
	source="speechbrain/metricgan-plus-voicebank",
	savedir="pretrained_models/metricgan-plus-voicebank",
	#run_opts={"device":"cuda"},
	)

	mes = [
	{"role": "system", "content": "You are my personal assistant. Try to be helpful."}
	]

	def chatgpt(apikey, result):

	openai.api_key = apikey

	messages = mes

	# chatgpt
	content = result
	messages.append({"role": "user", "content": content})

	completion = openai.ChatCompletion.create(
	model = "gpt-3.5-turbo",
	messages = messages
	)

	chat_response = completion.choices[0].message.content

	messages.append({"role": "assistant", "content": chat_response})

	return chat_response

	def english(text_en, upload, VoiceMicrophone):
	if upload is not None:
	tts1.tts_to_file(text_en, speaker_wav = upload, language="en", file_path="output.wav")

	else:
	tts1.tts_to_file(text_en, speaker_wav = VoiceMicrophone, language="en", file_path="output.wav")

	noisy = enhance_model.load_audio(
	"output.wav"
	).unsqueeze(0)

	enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
	torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)

	return "enhanced.wav"

	def chinese(text_cn, upload1, VoiceMicrophone1):
	if upload1 is not None:
	tts2.tts_with_vc_to_file(
	text_cn + "。",
	speaker_wav=upload1,
	file_path="ouptut1.wav"
	)

	else:
	tts2.tts_with_vc_to_file(
	text_cn + "。",
	speaker_wav=VoiceMicrophone1,
	file_path="ouptut1.wav"
	)

	return "ouptut1.wav"

	block = gr.Blocks()

	with block:
	with gr.Group():
	gr.Markdown(
	""" # <center>Talk to AI</center>


	"""
	)

	with gr.Box():
	with gr.Row().style(mobile_collapse=False, equal_height=True):

	inp1 = gr.Textbox(label='请输入您的Openai-API-Key', type = "password")
	inp2 = gr.Textbox(label='说点什么吧(中英皆可)')

	btn = gr.Button("开始对话吧")

	texts1 = gr.Textbox(lines=3, label="ChatGPT的回答")

	btn.click(chatgpt, [inp1, inp2], [texts1])

	with gr.Box():
	with gr.Row().style(mobile_collapse=False, equal_height=True):
	inp3 = texts1
	inp4 = gr.Audio(source="upload", label = "请上传您喜欢的声音(wav/mp3文件)", type="filepath")
	inp5 = gr.Audio(source="microphone", type="filepath", label = '请用麦克风上传您喜欢的声音，与文件上传二选一即可')

	btn1 = gr.Button("用喜欢的声音听一听吧(中文)")

	out1 = gr.Audio(label="合成的专属声音(中文)")

	btn1.click(chinese, [inp3, inp4, inp5], [out1])

	with gr.Box():
	with gr.Row().style(mobile_collapse=False, equal_height=True):

	btn2 = gr.Button("用喜欢的声音听一听吧(英文)")

	out2 = gr.Audio(label="合成的专属声音(英文)")

	btn2.click(english, [inp3, inp4, inp5], [out2])

	gr.Markdown(
	""" ### <center>仅供学习交流使用</center>

	### <center>Powered by [ChatGPT](https://chat.openai.com/).</center>

	"""
	)

	gr.HTML('''
	<div class="footer">
	<p>
	</p>
	<p>
	</p>
	</div>
	''')

	block.launch(show_error=True)