Mahiruoshi commited on
Commit
a7adc86
1 Parent(s): 99ded1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -19
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import logging
2
  logging.getLogger('numba').setLevel(logging.WARNING)
3
  import IPython.display as ipd
@@ -71,30 +72,59 @@ def selection(speaker):
71
  elif speaker == "派蒙":
72
  spk = 16
73
  return spk
74
-
 
 
 
 
75
  def sle(language,tts_input0):
76
  if language == "中文":
77
  tts_input1 = "[ZH]" + tts_input0.replace('\n','。').replace(' ',',') + "[ZH]"
78
  return tts_input1
79
- if language == "英文":
80
- tts_input1 = "[EN]" + tts_input0.replace('\n','.').replace(' ',',') + "[EN]"
81
  return tts_input1
82
  elif language == "日文":
83
  tts_input1 = "[JA]" + tts_input0.replace('\n','。').replace(' ',',') + "[JA]"
84
  return tts_input1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  def infer(language,text,speaker_id, n_scale= 0.667,n_scale_w = 0.8, l_scale = 1 ):
86
  speaker_id = int(selection(speaker_id))
87
- stn_tst = get_text(sle(language,text), hps_ms)
88
- with torch.no_grad():
89
- x_tst = stn_tst.unsqueeze(0).to(dev)
90
- x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(dev)
91
- sid = torch.LongTensor([speaker_id]).to(dev)
92
- t1 = time.time()
93
- audio = net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=n_scale, noise_scale_w=n_scale_w, length_scale=l_scale)[0][0,0].data.cpu().float().numpy()
94
- t2 = time.time()
95
- spending_time = "推理时间:"+str(t2-t1)+"s"
96
- print(spending_time)
97
- return (hps_ms.data.sampling_rate, audio)
 
 
 
 
 
 
 
98
  lan = ["中文","日文","英文"]
99
  idols = ["高咲侑","歩夢","かすみ","しずく","果林","愛","せつ菜","璃奈","栞子","エマ","ランジュ","ミア","派蒙"]
100
  dev = torch.device("cpu")
@@ -106,17 +136,17 @@ net_g_ms = ONNXVITS_infer.SynthesizerTrn(
106
  n_speakers=hps_ms.data.n_speakers,
107
  **hps_ms.model)
108
  _ = net_g_ms.eval()
109
- _ = utils.load_checkpoint("lovelive/G_330000.pth", net_g_ms)
110
  app = gr.Blocks()
111
  with app:
112
  with gr.Tabs():
113
 
114
- with gr.TabItem("New model"):
115
 
116
- tts_input1 = gr.TextArea(label="新模型,老版本在lovelive文件夹中", value="为什么你会这么熟练啊!你和雪菜亲过多少次了啊!?你到底要把我甩开多远你才甘心啊!?")
117
- language = gr.Dropdown(label="选择语言",choices=lan, value="中文", interactive=True)
118
  para_input1 = gr.Slider(minimum= 0.01,maximum=1.0,label="更改噪声比例", value=0.667)
119
- para_input2 = gr.Slider(minimum= 0.01,maximum=1.0,label="更改噪声偏差", value=0.8)
120
  para_input3 = gr.Slider(minimum= 0.1,maximum=10,label="更改时间比例", value=1)
121
  tts_submit = gr.Button("Generate", variant="primary")
122
  speaker1 = gr.Dropdown(label="选择说话人",choices=idols, value="歩夢", interactive=True)
 
1
+ import numpy as np
2
  import logging
3
  logging.getLogger('numba').setLevel(logging.WARNING)
4
  import IPython.display as ipd
 
72
  elif speaker == "派蒙":
73
  spk = 16
74
  return spk
75
+ def is_japanese(string):
76
+ for ch in string:
77
+ if ord(ch) > 0x3040 and ord(ch) < 0x30FF:
78
+ return True
79
+ return False
80
  def sle(language,tts_input0):
81
  if language == "中文":
82
  tts_input1 = "[ZH]" + tts_input0.replace('\n','。').replace(' ',',') + "[ZH]"
83
  return tts_input1
84
+ if language == "自动":
85
+ tts_input1 = f"[JA]{tts_input0}[JA]" if is_japanese(tts_input0) else f"[ZH]{tts_input0}[ZH]"
86
  return tts_input1
87
  elif language == "日文":
88
  tts_input1 = "[JA]" + tts_input0.replace('\n','。').replace(' ',',') + "[JA]"
89
  return tts_input1
90
+ def extrac(text):
91
+ result_list = re.split(r'\n', text)
92
+ final_list = []
93
+ for i in result_list:
94
+ i = i.replace('\n','').replace(' ','')
95
+ if len(i)>0:
96
+ if len(i) > 20:
97
+ try:
98
+ cur_list = re.split(r'。', i)
99
+ for i in cur_list:
100
+ if len(i)>0:
101
+ final_list.append(i+'。')
102
+ except:
103
+ pass
104
+ final_list.append(i)
105
+ final_list = [x for x in final_list if x != '']
106
+ print(final_list)
107
+ return final_list
108
  def infer(language,text,speaker_id, n_scale= 0.667,n_scale_w = 0.8, l_scale = 1 ):
109
  speaker_id = int(selection(speaker_id))
110
+ final_list = extrac(text)
111
+ audio_fin = []
112
+ c = 0
113
+ for sentence in final_list:
114
+ c +=1
115
+ try:
116
+ with torch.no_grad():
117
+ x_tst = stn_tst.unsqueeze(0).to(dev)
118
+ x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(dev)
119
+ sid = torch.LongTensor([speaker_id]).to(dev)
120
+ t1 = time.time()
121
+ audio = net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=n_scale, noise_scale_w=n_scale_w, length_scale=l_scale)[0][0,0].data.cpu().float().numpy()
122
+ t2 = time.time()
123
+ spending_time = "第"+str(c)+"句的推理时间为:"+str(t2-t1)+"s"
124
+ print(spending_time)
125
+ except:
126
+ print('存在非法字符')
127
+ return (hps_ms.data.sampling_rate, np.concatenate(audio_fin))
128
  lan = ["中文","日文","英文"]
129
  idols = ["高咲侑","歩夢","かすみ","しずく","果林","愛","せつ菜","璃奈","栞子","エマ","ランジュ","ミア","派蒙"]
130
  dev = torch.device("cpu")
 
136
  n_speakers=hps_ms.data.n_speakers,
137
  **hps_ms.model)
138
  _ = net_g_ms.eval()
139
+ _ = utils.load_checkpoint("lovelive/G_525000.pth", net_g_ms)
140
  app = gr.Blocks()
141
  with app:
142
  with gr.Tabs():
143
 
144
+ with gr.TabItem("虹团vits模型,现可按句分割实现长文本合成"):
145
 
146
+ tts_input1 = gr.TextArea(label="去标贝新模型,老版本在lovelive文件夹中", value="你一点也不在意我吗?一点也不会吗?完全不会?我对你来说不重要吗?只是朋友吗?普通的朋友吗?我希望自己不是普通的朋友,就算比普通好一点也好,我想成为不普通的……朋友……嗳,岛村,我该怎么做才好?嗳。岛村,你有在听吗?求你听我说话。你听到我的声音有什么想法吗?会有吗?你要说会感到放心,还是什么都好,拜托有点想法。我希望你可以有点想法,还是说我不该期待这种事?岛村!我就是要岛村啊,我啊,就是想要跟岛村待在一起。我不需要岛村以外的人,不需要……只要有岛村就好。我没有很任性喔,我只说比普通好一点,好一点而已啊。其他人根本就不重要,也不需要,我希望那些人都���以滚远一点,可是你为什么要去他们那边呢?求你来我这边,来我这边,待在我身边,不要离开我。不行,在岛村身旁的只能是我,我希望是我,我想待在你身边,拜托你让我待在你身边……那个女生是谁?我不认识她啊。我不想看到你变成我不认识的岛村,我想了解岛村的一切,也讨厌有我不想知道的事情存在,可是我更讨厌自己不知道,会更难受。会很难受,很痛苦,很痛苦……岛村……我想问你要不要一起出去玩,也想去祭典啊,我很想去啊,可是岛村为什么会跟那个女生一起去?为什么跟她一起出去玩?岛村你现在在哪里?有跟谁在一起吗?岛村,岛村……嗳,你有在听吗?")
147
+ language = gr.Dropdown(label="选择语言,目前勉强可以做到自动识别",choices=lan, value="自动", interactive=True)
148
  para_input1 = gr.Slider(minimum= 0.01,maximum=1.0,label="更改噪声比例", value=0.667)
149
+ para_input2 = gr.Slider(minimum= 0.01,maximum=1.0,label="更改噪声偏差,调小可以缩短间隔", value=0.8)
150
  para_input3 = gr.Slider(minimum= 0.1,maximum=10,label="更改时间比例", value=1)
151
  tts_submit = gr.Button("Generate", variant="primary")
152
  speaker1 = gr.Dropdown(label="选择说话人",choices=idols, value="歩夢", interactive=True)