leo-emovits

Build error

App Files Files Community

leo-emovits / app.py

chilge

Update app.py

2d38eef about 2 years ago

raw

history blame contribute delete

5.66 kB

	import gradio as gr
	import torch
	import commons
	import utils
	from models import SynthesizerTrn
	from text.symbols import symbols
	from text import text_to_sequence
	import numpy as np


	def get_text(text, hps):
	text_norm = text_to_sequence(text, hps.data.text_cleaners)
	if hps.data.add_blank:
	text_norm = commons.intersperse(text_norm, 0)
	text_norm = torch.LongTensor(text_norm)
	return text_norm
	hps = utils.get_hparams_from_file("./configs/leo.json")
	net_g = SynthesizerTrn(
	len(symbols),
	hps.data.filter_length // 2 + 1,
	hps.train.segment_size // hps.data.hop_length,
	n_speakers=hps.data.n_speakers,
	**hps.model)
	_ = net_g.eval()

	_ = utils.load_checkpoint("./logs/leo/G_4000.pth", net_g, None)
	all_emotions = np.load("all_emotions.npy")
	emotion_dict = {
	"小声(目前没区分）": 0,
	"激动": 1,
	"平静1": 2,
	"平静2": 3
	}
	import random
	def tts(txt, emotion, ns, nsw, ls):
	stn_tst = get_text(txt, hps)
	randsample = None
	with torch.no_grad():
	x_tst = stn_tst.unsqueeze(0)
	x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
	sid = torch.LongTensor([0])
	if type(emotion) ==int:
	emo = torch.FloatTensor(all_emotions[emotion]).unsqueeze(0)
	elif emotion == "random":
	emo = torch.randn([1,1024])
	elif emotion == "random_sample":
	randint = random.randint(0, all_emotions.shape[0])
	emo = torch.FloatTensor(all_emotions[randint]).unsqueeze(0)
	randsample = randint
	elif emotion.endswith("wav"):
	import emotion_extract
	emo = torch.FloatTensor(emotion_extract.extract_wav(emotion))
	else:
	emo = torch.FloatTensor(all_emotions[emotion_dict[emotion]]).unsqueeze(0)

	audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=ns, noise_scale_w=nsw, length_scale=ls, emo=emo)[0][0,0].data.float().numpy()
	return audio, randsample


	def tts1(text, emotion, ns, nsw, ls):
	if len(text) > 150:
	return "Error: Text is too long", None
	audio, _ = tts(text, emotion, ns, nsw, ls)
	return "Success", (hps.data.sampling_rate, audio)

	def tts2(text, ns, nsw, ls):
	if len(text) > 150:
	return "Error: Text is too long", None
	audio, randsample = tts(text, "random_sample", ns, nsw, ls)

	return str(randsample), (hps.data.sampling_rate, audio)

	def tts3(text, sample, ns, nsw, ls):
	if len(text) > 150:
	return "Error: Text is too long", None
	try:
	audio, _ = tts(text, int(sample), ns, nsw, ls)
	return "Success", (hps.data.sampling_rate, audio)
	except:
	return "输入参数不为整数或其他错误", None
	app = gr.Blocks()
	with app:

	with gr.Tabs():
	with gr.TabItem("使用预制情感合成"):
	tts_input1 = gr.TextArea(label="日语文本", value="こんにちは。")
	tts_input2 = gr.Dropdown(label="情感", choices=list(emotion_dict.keys()), value="平静1")
	ns = gr.Slider(label="noise_scale(控制感情变化程度)", minimum=0.1, maximum=1.0, step=0.1, value=0.6, interactive=True)
	nsw = gr.Slider(label="noise_scale_w(控制音素发音长度)", minimum=0.1, maximum=1.0, step=0.1, value=0.668, interactive=True)
	ls = gr.Slider(label="length_scale(控制整体语速)", minimum=0.1, maximum=2.0, step=0.1, value=1.2, interactive=True)
	tts_submit = gr.Button("合成音频", variant="primary")
	tts_output1 = gr.Textbox(label="Message")
	tts_output2 = gr.Audio(label="Output")
	tts_submit.click(tts1, [tts_input1, tts_input2, ns, nsw, ls], [tts_output1, tts_output2])
	with gr.TabItem("随机抽取训练集样本作为情感参数"):
	tts_input1 = gr.TextArea(label="日语文本", value="こんにちは。")
	ns = gr.Slider(label="noise_scale(控制感情变化程度)", minimum=0.1, maximum=1.0, step=0.1, value=0.6, interactive=True)
	nsw = gr.Slider(label="noise_scale_w(控制音素发音长度)", minimum=0.1, maximum=1.0, step=0.1, value=0.668, interactive=True)
	ls = gr.Slider(label="length_scale(控制整体语速)", minimum=0.1, maximum=2.0, step=0.1, value=1.2, interactive=True)
	tts_submit = gr.Button("合成音频", variant="primary")
	tts_output1 = gr.Textbox(label="随机样本id（可用于第三个tab中合成）")
	tts_output2 = gr.Audio(label="Output")
	tts_submit.click(tts2, [tts_input1, ns, nsw, ls], [tts_output1, tts_output2])

	with gr.TabItem("使用情感样本id作为情感参数"):

	tts_input1 = gr.TextArea(label="日语文本", value="こんにちは。")
	tts_input2 = gr.Number(label="情感样本id", value=0)
	ns = gr.Slider(label="noise_scale(控制感情变化程度)", minimum=0.1, maximum=1.0, step=0.1, value=0.6, interactive=True)
	nsw = gr.Slider(label="noise_scale_w(控制音素发音长度)", minimum=0.1, maximum=1.0, step=0.1, value=0.668, interactive=True)
	ls = gr.Slider(label="length_scale(控制整体语速)", minimum=0.1, maximum=2.0, step=0.1, value=1.2, interactive=True)
	tts_submit = gr.Button("合成音频", variant="primary")
	tts_output1 = gr.Textbox(label="Message")
	tts_output2 = gr.Audio(label="Output")
	tts_submit.click(tts3, [tts_input1, tts_input2, ns, nsw, ls], [tts_output1, tts_output2])

	with gr.TabItem("使用参考音频作为情感参数"):
	tts_input1 = gr.TextArea(label="text", value="暂未实现")

	app.launch()