Spaces:
Build error
Build error
File size: 4,042 Bytes
488cad6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import librosa
import matplotlib.pyplot as plt
import os
import json
import math
import requests
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
import utils
from models import SynthesizerTrn
from text import text_to_sequence
from scipy.io.wavfile import write
import re
from scipy import signal
import time
# - paths
path_to_config = "./config.json" # path to .json
path_to_model = "./best.pth" # path to G_xxxx.pth
#- text input
input = "소프트웨어 교육의 중요성이 날로 더해가는데 학생들은 소프트웨어 관련 교육을 쉽게 지루해해요."
# check device
if torch.cuda.is_available() is True:
device = "cuda:0"
else:
device = "cpu"
_pad = '_'
_punctuation = ';:,.!?¡¿—…"«»“” '
_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzㄱㄴㄷㄹㅁㅂㅅㅇㅈㅊㅋㅌㅍㅎㄲㄸㅃㅆㅉㅏㅓㅗㅜㅡㅣㅐㅔ'
_letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa) # !
SPACE_ID = symbols.index(" ")
def intersperse(lst, item):
result = [item] * (len(lst) * 2 + 1)
result[1::2] = lst
return result
def get_text(text, hps):
text_norm = text_to_sequence(text, hps.data.text_cleaners)
if hps.data.add_blank:
text_norm = intersperse(text_norm, 0)
text_norm = torch.LongTensor(text_norm)
return text_norm
def vcss(inputstr): # single
print('text:',inputstr)
fltstr = re.sub(r"[\[\]\(\)\{\}]", "", inputstr)
stn_tst = get_text(fltstr, hps)
speed = 1
output_dir = 'output'
sid = 0
start_time=time.time()
with torch.no_grad():
x_tst = stn_tst.to(device).unsqueeze(0)
x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(device)
audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1 / speed)[0][
0, 0].data.cpu().float().numpy()
write(f'./{output_dir}/tts_output.wav', hps.data.sampling_rate, audio)
print(f'./{output_dir}/output file Generated!')
end_time=time.time()
runTime=end_time-start_time
print("RunTime:{}sec".format(runTime))
def vcms(inputstr, sid): # multi
fltstr = re.sub(r"[\[\]\(\)\{\}]", "", inputstr)
#fltstr = langdetector(fltstr) #- optional for cjke/cjks type cleaners
stn_tst = get_text(fltstr, hps)
speed = 1
output_dir = 'output'
start_time=time.time()
with torch.no_grad():
x_tst = stn_tst.to(device).unsqueeze(0)
x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(device)
sid = torch.LongTensor([sid]).to(device)
audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1 / speed)[0][
0, 0].data.cpu().float().numpy()
write(f'./{output_dir}/output.wav', hps.data.sampling_rate, audio)
end_time=time.time()
print(f'./{output_dir}/output file Generated!')
end_time=time.time()
runTime=end_time-start_time
print("RunTime:{}sec".format(runTime))
hps = utils.get_hparams_from_file(path_to_config)
if "use_mel_posterior_encoder" in hps.model.keys() and hps.model.use_mel_posterior_encoder == True:
print("Using mel posterior encoder for VITS2")
posterior_channels = 80 # vits2
hps.data.use_mel_posterior_encoder = True
else:
print("Using lin posterior encoder for VITS1")
posterior_channels = hps.data.filter_length // 2 + 1
hps.data.use_mel_posterior_encoder = False
net_g = SynthesizerTrn(
len(symbols),
posterior_channels,
hps.train.segment_size // hps.data.hop_length,
n_speakers=hps.data.n_speakers, #- >0 for multi speaker
**hps.model).to(device)
_ = net_g.eval()
_ = utils.load_checkpoint(path_to_model, net_g, None)
vcss(input)
|