Mahiruoshi
commited on
Commit
•
a7adc86
1
Parent(s):
99ded1c
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import logging
|
2 |
logging.getLogger('numba').setLevel(logging.WARNING)
|
3 |
import IPython.display as ipd
|
@@ -71,30 +72,59 @@ def selection(speaker):
|
|
71 |
elif speaker == "派蒙":
|
72 |
spk = 16
|
73 |
return spk
|
74 |
-
|
|
|
|
|
|
|
|
|
75 |
def sle(language,tts_input0):
|
76 |
if language == "中文":
|
77 |
tts_input1 = "[ZH]" + tts_input0.replace('\n','。').replace(' ',',') + "[ZH]"
|
78 |
return tts_input1
|
79 |
-
if language == "
|
80 |
-
tts_input1 = "[
|
81 |
return tts_input1
|
82 |
elif language == "日文":
|
83 |
tts_input1 = "[JA]" + tts_input0.replace('\n','。').replace(' ',',') + "[JA]"
|
84 |
return tts_input1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
def infer(language,text,speaker_id, n_scale= 0.667,n_scale_w = 0.8, l_scale = 1 ):
|
86 |
speaker_id = int(selection(speaker_id))
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
lan = ["中文","日文","英文"]
|
99 |
idols = ["高咲侑","歩夢","かすみ","しずく","果林","愛","せつ菜","璃奈","栞子","エマ","ランジュ","ミア","派蒙"]
|
100 |
dev = torch.device("cpu")
|
@@ -106,17 +136,17 @@ net_g_ms = ONNXVITS_infer.SynthesizerTrn(
|
|
106 |
n_speakers=hps_ms.data.n_speakers,
|
107 |
**hps_ms.model)
|
108 |
_ = net_g_ms.eval()
|
109 |
-
_ = utils.load_checkpoint("lovelive/
|
110 |
app = gr.Blocks()
|
111 |
with app:
|
112 |
with gr.Tabs():
|
113 |
|
114 |
-
with gr.TabItem("
|
115 |
|
116 |
-
tts_input1 = gr.TextArea(label="
|
117 |
-
language = gr.Dropdown(label="
|
118 |
para_input1 = gr.Slider(minimum= 0.01,maximum=1.0,label="更改噪声比例", value=0.667)
|
119 |
-
para_input2 = gr.Slider(minimum= 0.01,maximum=1.0,label="
|
120 |
para_input3 = gr.Slider(minimum= 0.1,maximum=10,label="更改时间比例", value=1)
|
121 |
tts_submit = gr.Button("Generate", variant="primary")
|
122 |
speaker1 = gr.Dropdown(label="选择说话人",choices=idols, value="歩夢", interactive=True)
|
|
|
1 |
+
import numpy as np
|
2 |
import logging
|
3 |
logging.getLogger('numba').setLevel(logging.WARNING)
|
4 |
import IPython.display as ipd
|
|
|
72 |
elif speaker == "派蒙":
|
73 |
spk = 16
|
74 |
return spk
|
75 |
+
def is_japanese(string):
|
76 |
+
for ch in string:
|
77 |
+
if ord(ch) > 0x3040 and ord(ch) < 0x30FF:
|
78 |
+
return True
|
79 |
+
return False
|
80 |
def sle(language,tts_input0):
|
81 |
if language == "中文":
|
82 |
tts_input1 = "[ZH]" + tts_input0.replace('\n','。').replace(' ',',') + "[ZH]"
|
83 |
return tts_input1
|
84 |
+
if language == "自动":
|
85 |
+
tts_input1 = f"[JA]{tts_input0}[JA]" if is_japanese(tts_input0) else f"[ZH]{tts_input0}[ZH]"
|
86 |
return tts_input1
|
87 |
elif language == "日文":
|
88 |
tts_input1 = "[JA]" + tts_input0.replace('\n','。').replace(' ',',') + "[JA]"
|
89 |
return tts_input1
|
90 |
+
def extrac(text):
|
91 |
+
result_list = re.split(r'\n', text)
|
92 |
+
final_list = []
|
93 |
+
for i in result_list:
|
94 |
+
i = i.replace('\n','').replace(' ','')
|
95 |
+
if len(i)>0:
|
96 |
+
if len(i) > 20:
|
97 |
+
try:
|
98 |
+
cur_list = re.split(r'。', i)
|
99 |
+
for i in cur_list:
|
100 |
+
if len(i)>0:
|
101 |
+
final_list.append(i+'。')
|
102 |
+
except:
|
103 |
+
pass
|
104 |
+
final_list.append(i)
|
105 |
+
final_list = [x for x in final_list if x != '']
|
106 |
+
print(final_list)
|
107 |
+
return final_list
|
108 |
def infer(language,text,speaker_id, n_scale= 0.667,n_scale_w = 0.8, l_scale = 1 ):
|
109 |
speaker_id = int(selection(speaker_id))
|
110 |
+
final_list = extrac(text)
|
111 |
+
audio_fin = []
|
112 |
+
c = 0
|
113 |
+
for sentence in final_list:
|
114 |
+
c +=1
|
115 |
+
try:
|
116 |
+
with torch.no_grad():
|
117 |
+
x_tst = stn_tst.unsqueeze(0).to(dev)
|
118 |
+
x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(dev)
|
119 |
+
sid = torch.LongTensor([speaker_id]).to(dev)
|
120 |
+
t1 = time.time()
|
121 |
+
audio = net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=n_scale, noise_scale_w=n_scale_w, length_scale=l_scale)[0][0,0].data.cpu().float().numpy()
|
122 |
+
t2 = time.time()
|
123 |
+
spending_time = "第"+str(c)+"句的推理时间为:"+str(t2-t1)+"s"
|
124 |
+
print(spending_time)
|
125 |
+
except:
|
126 |
+
print('存在非法字符')
|
127 |
+
return (hps_ms.data.sampling_rate, np.concatenate(audio_fin))
|
128 |
lan = ["中文","日文","英文"]
|
129 |
idols = ["高咲侑","歩夢","かすみ","しずく","果林","愛","せつ菜","璃奈","栞子","エマ","ランジュ","ミア","派蒙"]
|
130 |
dev = torch.device("cpu")
|
|
|
136 |
n_speakers=hps_ms.data.n_speakers,
|
137 |
**hps_ms.model)
|
138 |
_ = net_g_ms.eval()
|
139 |
+
_ = utils.load_checkpoint("lovelive/G_525000.pth", net_g_ms)
|
140 |
app = gr.Blocks()
|
141 |
with app:
|
142 |
with gr.Tabs():
|
143 |
|
144 |
+
with gr.TabItem("虹团vits模型,现可按句分割实现长文本合成"):
|
145 |
|
146 |
+
tts_input1 = gr.TextArea(label="去标贝新模型,老版本在lovelive文件夹中", value="你一点也不在意我吗?一点也不会吗?完全不会?我对你来说不重要吗?只是朋友吗?普通的朋友吗?我希望自己不是普通的朋友,就算比普通好一点也好,我想成为不普通的……朋友……嗳,岛村,我该怎么做才好?嗳。岛村,你有在听吗?求你听我说话。你听到我的声音有什么想法吗?会有吗?你要说会感到放心,还是什么都好,拜托有点想法。我希望你可以有点想法,还是说我不该期待这种事?岛村!我就是要岛村啊,我啊,就是想要跟岛村待在一起。我不需要岛村以外的人,不需要……只要有岛村就好。我没有很任性喔,我只说比普通好一点,好一点而已啊。其他人根本就不重要,也不需要,我希望那些人都���以滚远一点,可是你为什么要去他们那边呢?求你来我这边,来我这边,待在我身边,不要离开我。不行,在岛村身旁的只能是我,我希望是我,我想待在你身边,拜托你让我待在你身边……那个女生是谁?我不认识她啊。我不想看到你变成我不认识的岛村,我想了解岛村的一切,也讨厌有我不想知道的事情存在,可是我更讨厌自己不知道,会更难受。会很难受,很痛苦,很痛苦……岛村……我想问你要不要一起出去玩,也想去祭典啊,我很想去啊,可是岛村为什么会跟那个女生一起去?为什么跟她一起出去玩?岛村你现在在哪里?有跟谁在一起吗?岛村,岛村……嗳,你有在听吗?")
|
147 |
+
language = gr.Dropdown(label="选择语言,目前勉强可以做到自动识别",choices=lan, value="自动", interactive=True)
|
148 |
para_input1 = gr.Slider(minimum= 0.01,maximum=1.0,label="更改噪声比例", value=0.667)
|
149 |
+
para_input2 = gr.Slider(minimum= 0.01,maximum=1.0,label="更改噪声偏差,调小可以缩短间隔", value=0.8)
|
150 |
para_input3 = gr.Slider(minimum= 0.1,maximum=10,label="更改时间比例", value=1)
|
151 |
tts_submit = gr.Button("Generate", variant="primary")
|
152 |
speaker1 = gr.Dropdown(label="选择说话人",choices=idols, value="歩夢", interactive=True)
|