import gradio as gr import os os.system('cd monotonic_align && python setup.py build_ext --inplace && cd ..') import torch import commons import utils from models import SynthesizerTrn from text.symbols import symbols from text import text_to_sequence import IPython.display as ipd import json import math #new imports import matplotlib.pyplot as plt import re from torch import nn from torch.nn import functional as F from torch.utils.data import DataLoader from models import SynthesizerTrn import unicodedata import openai def get_text(text, hps): text_norm = text_to_sequence(text, hps.data.text_cleaners) if hps.data.add_blank: text_norm = commons.intersperse(text_norm, 0) text_norm = torch.LongTensor(text_norm) return text_norm hps = utils.get_hparams_from_file("configs/biaobei_base.json") net_g = SynthesizerTrn( len(symbols), hps.data.filter_length // 2 + 1, hps.train.segment_size // hps.data.hop_length, **hps.model) _ = net_g.eval() _ = utils.load_checkpoint("G_1434000.pth", net_g, None) def friend_chat(text, tts_input3): call_name = "亚托克斯" openai.api_key = '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' identity = tts_input3 start_sequence = '\n'+str(call_name)+':' restart_sequence = "\nYou: " all_text = identity + restart_sequence if 1 == 1: prompt0 = text #当期prompt if text == 'quit': return prompt0 prompt = identity + prompt0 + start_sequence response = openai.Completion.create( model="text-davinci-003", prompt=prompt, temperature=0.5, max_tokens=1000, top_p=1.0, frequency_penalty=0.5, presence_penalty=0.0, stop=["\nYou:"] ) return response['choices'][0]['text'].strip() def sle(text, tts_input3): text = friend_chat(text, tts_input3).replace('\n','。').replace(' ',',') return text def infer(text,tts_input3): stn_tst = get_text(sle(text,tts_input3), hps) with torch.no_grad(): x_tst = stn_tst.unsqueeze(0) x_tst_lengths = torch.LongTensor([stn_tst.size(0)]) audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.cpu().float().numpy() sampling_rate = 22050 return (sampling_rate, audio) app = gr.Blocks() with app: with gr.Tabs(): with gr.TabItem("Basic"): tts_input1 = gr.TextArea(label="输入你想跟剑魔说的话", value="我是暮光星灵佐伊,我要三天之内杀了你") tts_input3 = gr.TextArea(label="写上你给他的设定", value="你叫亚托克斯,俗称剑魔,世界的终结者。") tts_submit = gr.Button("Generate", variant="primary") tts_output2 = gr.Audio(label="Output") tts_submit.click(infer, [tts_input1,tts_input3], [tts_output2]) app.launch()