Spaces:
Build error
Build error
import matplotlib.pyplot as plt | |
import IPython.display as ipd | |
import sys | |
import os | |
import json | |
import math | |
import torch | |
from torch import nn | |
from torch.nn import functional as F | |
from torch.utils.data import DataLoader | |
import modules.commons as commons | |
import utils.utils as utils | |
from models import SynthesizerTrn | |
from text import npu | |
from scipy.io.wavfile import write | |
from tqdm import tqdm | |
import numpy as np | |
import time | |
import argparse | |
def parse_label(hps, pho, pitchid, dur, slur, gtdur): | |
phos = [] | |
pitchs = [] | |
durs = [] | |
slurs = [] | |
gtdurs = [] | |
for index in range(len(pho.split())): | |
phos.append(npu.symbol_converter.ttsing_phone_to_int[pho.strip().split()[index]]) | |
pitchs.append(npu.symbol_converter.ttsing_opencpop_pitch_to_int[pitchid.strip().split()[index]]) | |
durs.append(float(dur.strip().split()[index])) | |
slurs.append(int(slur.strip().split()[index])) | |
gtdurs.append(float(gtdur.strip().split()[index])) | |
phos = np.asarray(phos, dtype=np.int32) | |
pitchs = np.asarray(pitchs, dtype=np.int32) | |
durs = np.asarray(durs, dtype=np.float32) | |
slurs = np.asarray(slurs, dtype=np.int32) | |
gtdurs = np.asarray(gtdurs, dtype=np.float32) | |
gtdurs = np.ceil(gtdurs / (hps.data.hop_size / hps.data.sample_rate)) | |
phos = torch.LongTensor(phos) | |
pitchs = torch.LongTensor(pitchs) | |
durs = torch.FloatTensor(durs) | |
slurs = torch.LongTensor(slurs) | |
gtdurs = torch.LongTensor(gtdurs) | |
return phos, pitchs, durs, slurs, gtdurs | |
def load_model(model_dir): | |
# load config and model | |
model_path = utils.latest_checkpoint_path(model_dir) | |
config_path = os.path.join(model_dir, "config.json") | |
hps = utils.get_hparams_from_file(config_path) | |
print("Load model from : ", model_path) | |
print("config: ", config_path) | |
net_g = SynthesizerTrn(hps) | |
_ = net_g.eval() | |
_ = utils.load_checkpoint(model_path, net_g, None) | |
return net_g, hps | |
def inference_label2wav(net_g, label_list_path, output_dir, hps, cuda_id=None): | |
id2label = {} | |
with open(label_list_path, "r") as in_file: | |
for line in in_file.readlines(): | |
fileid, txt, phones, pitchid, dur, gtdur, slur = line.split('|') | |
id2label[fileid] = [phones, pitchid, dur, slur, gtdur] | |
for file_name in tqdm(id2label.keys()): | |
pho, pitchid, dur, slur, gtdur = id2label[file_name] | |
pho, pitchid, dur, slur, gtdur = parse_label(hps, pho, pitchid, dur, slur, gtdur) | |
with torch.no_grad(): | |
# data | |
pho_lengths = torch.LongTensor([pho.size(0)]) | |
pho = pho.unsqueeze(0) | |
pitchid = pitchid.unsqueeze(0) | |
dur = dur.unsqueeze(0) | |
slur = slur.unsqueeze(0) | |
if(cuda_id != None): | |
net_g = net_g.cuda(0) | |
pho = pho.cuda(0) | |
pho_lengths = pho_lengths.cuda(0) | |
pitchid = pitchid.cuda(0) | |
dur = dur.cuda(0) | |
slur = slur.cuda(0) | |
# infer | |
o, _, _ = net_g.infer(pho, pho_lengths, pitchid, dur, slur) | |
audio = o[0,0].data.cpu().float().numpy() | |
audio = audio * 32768 #hps.data.max_wav_value | |
audio = audio.astype(np.int16) | |
# save | |
write(os.path.join(output_dir, file_name.split('.')[0] + '.wav' ), hps.data.sample_rate, audio) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-model_dir', '--model_dir', type=str, required=True) | |
parser.add_argument('-input_dir', '--input_dir', type=str, required=True) | |
parser.add_argument('-output_dir', '--output_dir', type=str, required=True) | |
args = parser.parse_args() | |
model_dir = args.model_dir | |
input_dir = args.input_dir | |
output_dir = args.output_dir | |
model, hps = load_model(model_dir) | |
if(not os.path.exists(output_dir)): | |
os.makedirs(output_dir) | |
print("load model end!") | |
inference_label2wav(model, input_dir, output_dir, hps, cuda_id=0) | |