darksakura commited on
Commit
ff67781
·
1 Parent(s): 6772e20

Upload 2 files

Browse files
Files changed (2) hide show
  1. N.zip +2 -2
  2. app.py +14 -9
N.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b56e4467e1353d6475a17e6b4d67631103cd68df61108ddbe98ebb4962fbc1ec
3
- size 447896729
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2ffdb4f06600470610cbaaf7e3aa9b75c32005707f42c545504b6a1d1588cfa
3
+ size 298535552
app.py CHANGED
@@ -49,9 +49,8 @@ def tts_fn(_text, _gender, _lang, _rate, _volume, sid, vc_transform, auto_f0,clu
49
  soundfile.write(input_audio, audio, 44100, format="wav")
50
 
51
  output_file_path = "tts_output.mp3"
52
- print (_text, _gender, _lang, _rate, _volume, sid, vc_transform, auto_f0,cluster_ratio, slice_db, f0_predictor)
53
  _audio = model.slice_inference(input_audio, sid, vc_transform, slice_db, cluster_ratio, auto_f0, 0.4,f0_predictor=f0_predictor,clip_seconds=40)
54
-
55
  soundfile.write("tts_output.mp3", _audio, 44100, format="mp3")
56
  return "Success", output_file_path
57
 
@@ -136,7 +135,7 @@ def loadmodel(model_):
136
  spks = list(model.spk2id.keys())
137
  print(model_, "configs/" + model_name + ".json", "./kmeans/" + model_name + ".pt")
138
 
139
- return "success",update_dropdown(spks)
140
 
141
  def update_dropdown(new_choices):
142
  global model
@@ -170,7 +169,7 @@ with app:
170
  #f'<img style="width:auto;height:300px;" src="cover.png">'
171
  #'</div>')
172
 
173
- gr.Markdown(value=base64.b64decode( b'ICAgICAgICAgICAgICAgICAgICAjIOWJjeiogAogICAgICAgICAgICAgICAgICAgICog5Zu95YaF6K6/6Zeu6L6D5oWi77yM5bu66K6u5L2/55So5Luj55CGLuabtOaWsOS6jjIzLTExLTA244CC5LuF5L6b5Liq5Lq65aix5LmQ5ZKM6Z2e5ZWG5Lia55So6YCU77yM56aB5q2i55So5LqO6KGA6IWl44CB5pq05Yqb44CB5oCn55u45YWz44CB5pS/5rK755u45YWz5YaF5a65CiAgICAgICAgICAgICAgICAgICAgKiDkuYPmnKjlnYI3NeS9jVRUU++8mltodHRwczovL3ZpdHMubm9naXpha2E0Ni5jY10oaHR0cHM6Ly92aXRzLm5vZ2l6YWthNDYuY2MpIAogICAgICAgICAgICAgICAgICAgICog5qyi6L+O5Yqg5YWl6K6o6K66VEfnvqQ6W2h0dHBzOi8vdC5tZS8rdlA4TksxTk1MaVl6TURKbF0oaHR0cHM6Ly90Lm1lLyt2UDhOSzFOTUxpWXpNREpsKSDnvqTph4zmnIlCb3TkuIDmraXliLbkvZzvvIzmlrnkvr/lnKjnp7vliqjorr7lpIfliLbkvZzjgIIKICAgICAgICAgICAgICAgICAgICAjIOWjsOaYjgogICAgICAgICAgICAgICAgICAgICog5Lu75L2V5Y+R5biD5Yiw572R57uc5bmz5Y+w55qE5Z+65LqOIHNvdml0cyDliLbkvZznmoTpn7PpopHvvIzpg73lv4XpobvopoHlnKjnroDku4vmmI7noa7mjIfmmI7nlKjkuo7nmoTovpPlhaXmupDmiJbpn7PkuZDpk77mjqUKICAgICAgICAgICAgICAgICAgICAqIOWmgueUqOatpOaooeWei+WItuS9nOmfs+mikeivt+agh+azqOacrOWcqOe6v+i9rOaNouWcsOWdgO+8mmh0dHBzOi8vc292aXRzNC5ub2dpemFrYTQ2LmNj').decode())
174
 
175
  with gr.Tabs():
176
  with gr.TabItem("单个音频上传"):
@@ -188,20 +187,26 @@ with app:
188
  tts_rate = gr.Slider(label = "TTS语音变速(倍速相对值)", minimum = -1, maximum = 3, value = 0, step = 0.1)
189
  tts_volume = gr.Slider(label = "TTS语音音量(相对值)", minimum = -1, maximum = 1.5, value = 0, step = 0.1)
190
  vc_tts_submit = gr.Button("文本转语音", variant="primary")
 
 
 
 
 
191
  spks = list(model.spk2id.keys())
192
 
193
- sid = gr.Dropdown(label="音色", choices=spks, value="HOSHINO_MINAMI")
194
  sid.change(fn=update_dropdown,inputs=[sid],outputs=[sid])
195
  sid.update(interactive=True)
196
- with gr.Accordion(label="↓切换模型(默认58位成员v1,音色具有抽奖性质,可切换尝试。也有特化的个人模型可选择)", open=False):
 
197
  modelstrs = gr.Dropdown(label = "模型", choices = modelPaths, value = modelPaths[0], type = "value")
198
  btnMod = gr.Button("载入模型")
199
- statusa = gr.TextArea()
200
- btnMod.click(loadmodel, inputs=[modelstrs], outputs = [statusa,sid])
201
  with gr.Row():
202
  slice_db = gr.Slider(label="切片阈值(较嘈杂时-30,保留呼吸声时-50)",maximum=-30, minimum=-70, step=1, value=-40)
203
  vc_transform = gr.Slider(label="变调(整数,可以正负,半音数量,升高八度就是12)",maximum=16, minimum=-16, step=1, value=0)
204
- f0_predictor = gr.Radio(label="f0预测器(如遇哑音可以尝试更换f0)凭干声干净程度选择。推荐fcpermvpe", choices=["pm","dio","harvest","fcpe","rmvpe"], value="fcpe")
205
  with gr.Row():
206
  cluster_ratio = gr.Slider(label="聚类模型混合比例,0-1之间,默认为0不启用聚类,能提升音色相似度,但会导致咬字下降(如果使用建议0.5左右)",maximum=1, minimum=0, step=0.1, value=0)
207
  output_format = gr.Radio(label="音频输出格式(MP3会导致时间轴多27ms,需合成请选flac)", choices=["flac", "mp3"], value = "mp3")#格式
 
49
  soundfile.write(input_audio, audio, 44100, format="wav")
50
 
51
  output_file_path = "tts_output.mp3"
 
52
  _audio = model.slice_inference(input_audio, sid, vc_transform, slice_db, cluster_ratio, auto_f0, 0.4,f0_predictor=f0_predictor,clip_seconds=40)
53
+ print (_text, _gender, _lang, _rate, _volume, sid, vc_transform, auto_f0,cluster_ratio, slice_db, f0_predictor)
54
  soundfile.write("tts_output.mp3", _audio, 44100, format="mp3")
55
  return "Success", output_file_path
56
 
 
135
  spks = list(model.spk2id.keys())
136
  print(model_, "configs/" + model_name + ".json", "./kmeans/" + model_name + ".pt")
137
 
138
+ return update_dropdown(spks)
139
 
140
  def update_dropdown(new_choices):
141
  global model
 
169
  #f'<img style="width:auto;height:300px;" src="cover.png">'
170
  #'</div>')
171
 
172
+ gr.Markdown(value=base64.b64decode( b'ICAgICAgICAgICAgICAgICAgICAjIOWJjeiogAogICAgICAgICAgICAgICAgICAgICog5Zu95YaF6K6/6Zeu6L6D5oWi77yM5bu66K6u5L2/55So5Luj55CGLuabtOaWsOS6jjIzLTExLTI3LOS6lOacn+W3sumHjeWBmu+8jOaYr+WboOS4uuS5g+acqFNUQVLoqpXnlJ/ntKDmnZDkv67pn7Pov4fph43vvIzpg6jliIbmiJDlkZjpnIDopoHljYfosIPjgILku4XkvpvkuKrkurrlqLHkuZDlkozpnZ7llYbkuJrnlKjpgJTvvIznpoHmraLnlKjkuo7ooYDohaXjgIHmmrTlipvjgIHmgKfnm7jlhbPjgIHmlL/msrvnm7jlhbPlhoXlrrkKICAgICAgICAgICAgICAgICAgICAjIOWjsOaYjgogICAgICAgICAgICAgICAgICAgICog5Lu75L2V5Y+R5biD5Yiw572R57uc5bmz5Y+w55qE5Z+65LqOIHNvdml0cyDliLbkvZznmoTpn7PpopHvvIzpg73lv4XpobvopoHlnKjnroDku4vmmI7noa7mjIfmmI7nlKjkuo7nmoTovpPlhaXmupDmiJbpn7PkuZDpk77mjqUKICAgICAgICAgICAgICAgICAgICAqIOWmgueUqOatpOaooeWei+WItuS9nOmfs+mikeivt+agh+azqOacrOWcqOe6v+i9rOaNouWcsOWdgO+8mmh0dHBzOi8vc292aXRzNC5ub2dpemFrYTQ2LmNj').decode())
173
 
174
  with gr.Tabs():
175
  with gr.TabItem("单个音频上传"):
 
187
  tts_rate = gr.Slider(label = "TTS语音变速(倍速相对值)", minimum = -1, maximum = 3, value = 0, step = 0.1)
188
  tts_volume = gr.Slider(label = "TTS语音音量(相对值)", minimum = -1, maximum = 1.5, value = 0, step = 0.1)
189
  vc_tts_submit = gr.Button("文本转语音", variant="primary")
190
+ with gr.TabItem("使用方法说明:"):
191
+
192
+ gr.Markdown(value=base64.b64decode( b'5oeS5Lq65pa55rOVOgrliqDlhaXorqjorrpUR+e+pDpbaHR0cHM6Ly90Lm1lLyt2UDhOSzFOTUxpWXpNREpsXShodHRwczovL3QubWUvK3ZQOE5LMU5NTGlZek1ESmwpICDnvqTph4zmnIlCb3Tlhajoh6rliqjliLbkvZxBSee/u+WUse+8jOS9v+eUqCBVVlItTURYLU5FVCBNYWluKzVfSFBfS2FyYW9rZS1VVlLvvIzkuI3ov4flhajoh6rliqjljJbvvIzotKjph4/oh6rnhLbkuI3lpoLkurrlt6XliLbkvZznmoTjgIIKCuaJi+WKqOaWueazle+8mgrmnIDmlrDov57mi5vvvIjlubLlo7DvvInvvJoKMS4gLuWIhuemu+S6uuWjsOOAkOWPr+ebtOaOpemAiTPmiJY077yM5L2G5Y+v6IO95o2f5Lyk5Lq65aOw44CR77yaCiDCoCDCoFVWUi1NRFgyM0MtSW5zdFZvYyBIUSAo6ZyA5YaN6LeRM19IUC1Wb2NhbC1VVlIpCiDCoCDCoERlbXVjc1YzIOaIliBWNCAgKOmcgOWGjei3kTNfSFAtVm9jYWwtVVZSKQogwqAgwqAzX0hQLVZvY2FsLVVWUgogwqAgwqBVVlItTURYLU5FVCBNYWluCgoyLiDlpoLmnpzmnInliJnljrvlkozlo7DjgJAz6YCJMe+8jOWTquS4quaViOaenOWlveWwseeUqOWTquS4quOAke+8mgogwqAgwqA1X0hQX0thcmFva2UtVVZSCiDCoCDCoFVWUi1CVkUtNEJfU04tNDQxMDAtMemAiUluc3RydW1lbnRhbCBPbmx5wqAgwqAKIMKgIMKgNl9IUF9LYXJhb2tlLVVWUgogwqAgwqBVVlItTURYLU5FVCBLYXJhb2tlKOi9u+W6puWOu+mZpCzpnIDlpJrmrKEpCgozLiDlpoLmnpzmnInliJnljrvmt7flk43jgJAy6YCJMe+8jOagueaNrua3t+WTjeeahOeoi+W6pumAieaLqeOAke+8mgogwqAgwqBVVlItRGUtRWNoby1Ob3JtYWzpgIlObyBFY2hvIE9ubHnvvIjovbvluqbmt7flk43vvIkKIMKgIMKgVVZSLURlLUVjaG8tQWdncmVzc2l2ZemAiU5vIEVjaG8gT25see+8iOmHjeW6pua3t+WTje+8iQoK5LiN5o6o6I2Q55So5LuY6LS56L2v5Lu244CCdXZyNeWMheWQq+W4gumdouS4iuacgOW8uuW8gOa6kOaooeWei++8jOi9r+S7tuWujOWFqOWFjei0ue+8ge+8ge+8ge+8ge+8mmh0dHBzOi8vdWx0aW1hdGV2b2NhbHJlbW92ZXIuY29tLwrnoa7kv51VVlLmmK/mnIDmlrDniYjmnKzvvJo1LjYuMC7lpoLmnpxVVlLph4zpnaLmsqHkuIrov7DmqKHlnovvvIzngrnlsI/mibPmiYvvvIzljrtEb3dubG9hZCBDZW50ZXLph4zpnaLkuIvovb3mqKHlnovvvIjor7foh6rlpIfmoq/lrZDvvIzlkKbliJnkvJrkuIvovb3lpLHotKXvvIk=').decode())
193
+
194
+
195
  spks = list(model.spk2id.keys())
196
 
197
+ sid = gr.Dropdown(label="音色(目前有58个)", choices=spks, value="HOSHINO_MINAMI")
198
  sid.change(fn=update_dropdown,inputs=[sid],outputs=[sid])
199
  sid.update(interactive=True)
200
+ with gr.Accordion(label="↓切换模型(默认58v1,音色具有抽奖性质,可尝试切换。也有特化的个人模型可选择)", open=False):
201
+ gr.Markdown(value="特殊说明:IKEDA_TERESA个人模型无聚类模型。44位成员的模型是9月份制作的,音质最好。")
202
  modelstrs = gr.Dropdown(label = "模型", choices = modelPaths, value = modelPaths[0], type = "value")
203
  btnMod = gr.Button("载入模型")
204
+
205
+ btnMod.click(loadmodel, inputs=[modelstrs], outputs = [sid])
206
  with gr.Row():
207
  slice_db = gr.Slider(label="切片阈值(较嘈杂时-30,保留呼吸声时-50)",maximum=-30, minimum=-70, step=1, value=-40)
208
  vc_transform = gr.Slider(label="变调(整数,可以正负,半音数量,升高八度就是12)",maximum=16, minimum=-16, step=1, value=0)
209
+ f0_predictor = gr.Radio(label="f0预测器(如遇哑音可以尝试更换f0)凭干声干净程度选择。只推荐fcpe音色最像或rmvpe音最准", choices=["pm","dio","harvest","fcpe","rmvpe"], value="fcpe")
210
  with gr.Row():
211
  cluster_ratio = gr.Slider(label="聚类模型混合比例,0-1之间,默认为0不启用聚类,能提升音色相似度,但会导致咬字下降(如果使用建议0.5左右)",maximum=1, minimum=0, step=0.1, value=0)
212
  output_format = gr.Radio(label="音频输出格式(MP3会导致时间轴多27ms,需合成请选flac)", choices=["flac", "mp3"], value = "mp3")#格式