Spaces:

candlend
/

vits-hoshimi

Runtime error

File size: 3,255 Bytes

44c1ee0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfaaeb5
 
44c1ee0
f23d0b3
 
 
44c1ee0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfaaeb5
44c1ee0
bfaaeb5
44c1ee0
 
 
 
 
 
 
 
 
 
 
 
f23d0b3
44c1ee0
bfaaeb5
44c1ee0
 
 
 
 
f23d0b3
 
fddd048
f23d0b3
fddd048
f23d0b3
 
44c1ee0

import os
import json
import math
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader

import commons
import utils
from models import SynthesizerTrn
from text.symbols import symbols
from text import text_to_sequence
import gradio as gr


pth_path = os.path.basename(utils.latest_checkpoint_path("./", "G_*.pth"))
# pth_path = "G_250000.pth"
hps = utils.get_hparams_from_file("./configs/hoshimi_base.json")
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")
print(device)

def get_text(text, hps):
    text_norm = text_to_sequence(text, hps.data.text_cleaners)
    if hps.data.add_blank:
        text_norm = commons.intersperse(text_norm, 0)
    text_norm = torch.LongTensor(text_norm)
    return text_norm

def load_model(pth_path):
    net_g = SynthesizerTrn(
        len(symbols),
        hps.data.filter_length // 2 + 1,
        hps.train.segment_size // hps.data.hop_length,
        **hps.model).to(device)
    _ = net_g.eval()

    _ = utils.load_checkpoint(pth_path, net_g, None)
    return net_g


def list_model():
    global pth_path
    res = []
    dir = os.getcwd()
    for f in os.listdir(dir):
        if (f.startswith("D_")):
            continue
        if (f.endswith(".pth")):
            res.append(f)
    return res


def infer(text):
    stn_tst = get_text(text, hps)
    with torch.no_grad():
        x_tst = stn_tst.unsqueeze(0).to(device)
        x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(device)
        audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.float().numpy()
    return (hps.data.sampling_rate, audio)


models = list_model()
net_g = load_model(pth_path)

def change_model(model):
    global pth_path
    global net_g
    pth_path = model
    net_g = load_model(pth_path)
    return "载入模型："+pth_path


app = gr.Blocks()
with app:
    with open("header.html", "r") as f:
        gr.HTML(f.read())
    with gr.Tabs():
        with gr.TabItem("Basic"):
            choice_model = gr.Dropdown(
                choices=models, label="模型", value=pth_path)
            tts_input1 = gr.TextArea(
                label="请输入文本（目前只支持汉字和单个英文字母，建议使用常用符号和空格来改变语调和停顿）",
                value="这里是爱喝奶茶，穿得也像奶茶魅力点是普通话二乙的星弥吼西咪，晚上齁。")
            tts_submit = gr.Button("合成", variant="primary")
            tts_output = gr.Audio(label="Output")
            tts_model = gr.Markdown("")
            tts_submit.click(infer, [tts_input1], [tts_output])
            choice_model.change(change_model, inputs=[
                                choice_model], outputs=[tts_model])
            gr.HTML('''
                <div style="text-align:right;font-size:12px;color:#4D4D4D">
                    <div class="font-medium">版权声明</div>
                    <div>本项目数据集和模型版权属于星弥Hoshimi</div>
                    <div>仅供学习交流，不可用于任何商业和非法用途，否则后果自负</div>
                </div>
            ''')
    app.launch()