Spaces:
Running
Running
darksakura
commited on
Commit
·
ff67781
1
Parent(s):
6772e20
Upload 2 files
Browse files
N.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2ffdb4f06600470610cbaaf7e3aa9b75c32005707f42c545504b6a1d1588cfa
|
3 |
+
size 298535552
|
app.py
CHANGED
@@ -49,9 +49,8 @@ def tts_fn(_text, _gender, _lang, _rate, _volume, sid, vc_transform, auto_f0,clu
|
|
49 |
soundfile.write(input_audio, audio, 44100, format="wav")
|
50 |
|
51 |
output_file_path = "tts_output.mp3"
|
52 |
-
print (_text, _gender, _lang, _rate, _volume, sid, vc_transform, auto_f0,cluster_ratio, slice_db, f0_predictor)
|
53 |
_audio = model.slice_inference(input_audio, sid, vc_transform, slice_db, cluster_ratio, auto_f0, 0.4,f0_predictor=f0_predictor,clip_seconds=40)
|
54 |
-
|
55 |
soundfile.write("tts_output.mp3", _audio, 44100, format="mp3")
|
56 |
return "Success", output_file_path
|
57 |
|
@@ -136,7 +135,7 @@ def loadmodel(model_):
|
|
136 |
spks = list(model.spk2id.keys())
|
137 |
print(model_, "configs/" + model_name + ".json", "./kmeans/" + model_name + ".pt")
|
138 |
|
139 |
-
return
|
140 |
|
141 |
def update_dropdown(new_choices):
|
142 |
global model
|
@@ -170,7 +169,7 @@ with app:
|
|
170 |
#f'<img style="width:auto;height:300px;" src="cover.png">'
|
171 |
#'</div>')
|
172 |
|
173 |
-
gr.Markdown(value=base64.b64decode( b'ICAgICAgICAgICAgICAgICAgICAjIOWJjeiogAogICAgICAgICAgICAgICAgICAgICog5Zu95YaF6K6/6Zeu6L6D5oWi77yM5bu66K6u5L2/
|
174 |
|
175 |
with gr.Tabs():
|
176 |
with gr.TabItem("单个音频上传"):
|
@@ -188,20 +187,26 @@ with app:
|
|
188 |
tts_rate = gr.Slider(label = "TTS语音变速(倍速相对值)", minimum = -1, maximum = 3, value = 0, step = 0.1)
|
189 |
tts_volume = gr.Slider(label = "TTS语音音量(相对值)", minimum = -1, maximum = 1.5, value = 0, step = 0.1)
|
190 |
vc_tts_submit = gr.Button("文本转语音", variant="primary")
|
|
|
|
|
|
|
|
|
|
|
191 |
spks = list(model.spk2id.keys())
|
192 |
|
193 |
-
sid = gr.Dropdown(label="音色", choices=spks, value="HOSHINO_MINAMI")
|
194 |
sid.change(fn=update_dropdown,inputs=[sid],outputs=[sid])
|
195 |
sid.update(interactive=True)
|
196 |
-
with gr.Accordion(label="↓切换模型(默认
|
|
|
197 |
modelstrs = gr.Dropdown(label = "模型", choices = modelPaths, value = modelPaths[0], type = "value")
|
198 |
btnMod = gr.Button("载入模型")
|
199 |
-
|
200 |
-
btnMod.click(loadmodel, inputs=[modelstrs], outputs = [
|
201 |
with gr.Row():
|
202 |
slice_db = gr.Slider(label="切片阈值(较嘈杂时-30,保留呼吸声时-50)",maximum=-30, minimum=-70, step=1, value=-40)
|
203 |
vc_transform = gr.Slider(label="变调(整数,可以正负,半音数量,升高八度就是12)",maximum=16, minimum=-16, step=1, value=0)
|
204 |
-
f0_predictor = gr.Radio(label="f0预测器(如遇哑音可以尝试更换f0)
|
205 |
with gr.Row():
|
206 |
cluster_ratio = gr.Slider(label="聚类模型混合比例,0-1之间,默认为0不启用聚类,能提升音色相似度,但会导致咬字下降(如果使用建议0.5左右)",maximum=1, minimum=0, step=0.1, value=0)
|
207 |
output_format = gr.Radio(label="音频输出格式(MP3会导致时间轴多27ms,需合成请选flac)", choices=["flac", "mp3"], value = "mp3")#格式
|
|
|
49 |
soundfile.write(input_audio, audio, 44100, format="wav")
|
50 |
|
51 |
output_file_path = "tts_output.mp3"
|
|
|
52 |
_audio = model.slice_inference(input_audio, sid, vc_transform, slice_db, cluster_ratio, auto_f0, 0.4,f0_predictor=f0_predictor,clip_seconds=40)
|
53 |
+
print (_text, _gender, _lang, _rate, _volume, sid, vc_transform, auto_f0,cluster_ratio, slice_db, f0_predictor)
|
54 |
soundfile.write("tts_output.mp3", _audio, 44100, format="mp3")
|
55 |
return "Success", output_file_path
|
56 |
|
|
|
135 |
spks = list(model.spk2id.keys())
|
136 |
print(model_, "configs/" + model_name + ".json", "./kmeans/" + model_name + ".pt")
|
137 |
|
138 |
+
return update_dropdown(spks)
|
139 |
|
140 |
def update_dropdown(new_choices):
|
141 |
global model
|
|
|
169 |
#f'<img style="width:auto;height:300px;" src="cover.png">'
|
170 |
#'</div>')
|
171 |
|
172 |
+
gr.Markdown(value=base64.b64decode( b'ICAgICAgICAgICAgICAgICAgICAjIOWJjeiogAogICAgICAgICAgICAgICAgICAgICog5Zu95YaF6K6/6Zeu6L6D5oWi77yM5bu66K6u5L2/55So5Luj55CGLuabtOaWsOS6jjIzLTExLTI3LOS6lOacn+W3sumHjeWBmu+8jOaYr+WboOS4uuS5g+acqFNUQVLoqpXnlJ/ntKDmnZDkv67pn7Pov4fph43vvIzpg6jliIbmiJDlkZjpnIDopoHljYfosIPjgILku4XkvpvkuKrkurrlqLHkuZDlkozpnZ7llYbkuJrnlKjpgJTvvIznpoHmraLnlKjkuo7ooYDohaXjgIHmmrTlipvjgIHmgKfnm7jlhbPjgIHmlL/msrvnm7jlhbPlhoXlrrkKICAgICAgICAgICAgICAgICAgICAjIOWjsOaYjgogICAgICAgICAgICAgICAgICAgICog5Lu75L2V5Y+R5biD5Yiw572R57uc5bmz5Y+w55qE5Z+65LqOIHNvdml0cyDliLbkvZznmoTpn7PpopHvvIzpg73lv4XpobvopoHlnKjnroDku4vmmI7noa7mjIfmmI7nlKjkuo7nmoTovpPlhaXmupDmiJbpn7PkuZDpk77mjqUKICAgICAgICAgICAgICAgICAgICAqIOWmgueUqOatpOaooeWei+WItuS9nOmfs+mikeivt+agh+azqOacrOWcqOe6v+i9rOaNouWcsOWdgO+8mmh0dHBzOi8vc292aXRzNC5ub2dpemFrYTQ2LmNj').decode())
|
173 |
|
174 |
with gr.Tabs():
|
175 |
with gr.TabItem("单个音频上传"):
|
|
|
187 |
tts_rate = gr.Slider(label = "TTS语音变速(倍速相对值)", minimum = -1, maximum = 3, value = 0, step = 0.1)
|
188 |
tts_volume = gr.Slider(label = "TTS语音音量(相对值)", minimum = -1, maximum = 1.5, value = 0, step = 0.1)
|
189 |
vc_tts_submit = gr.Button("文本转语音", variant="primary")
|
190 |
+
with gr.TabItem("使用方法说明:"):
|
191 |
+
|
192 |
+
gr.Markdown(value=base64.b64decode( b'5oeS5Lq65pa55rOVOgrliqDlhaXorqjorrpUR+e+pDpbaHR0cHM6Ly90Lm1lLyt2UDhOSzFOTUxpWXpNREpsXShodHRwczovL3QubWUvK3ZQOE5LMU5NTGlZek1ESmwpICDnvqTph4zmnIlCb3Tlhajoh6rliqjliLbkvZxBSee/u+WUse+8jOS9v+eUqCBVVlItTURYLU5FVCBNYWluKzVfSFBfS2FyYW9rZS1VVlLvvIzkuI3ov4flhajoh6rliqjljJbvvIzotKjph4/oh6rnhLbkuI3lpoLkurrlt6XliLbkvZznmoTjgIIKCuaJi+WKqOaWueazle+8mgrmnIDmlrDov57mi5vvvIjlubLlo7DvvInvvJoKMS4gLuWIhuemu+S6uuWjsOOAkOWPr+ebtOaOpemAiTPmiJY077yM5L2G5Y+v6IO95o2f5Lyk5Lq65aOw44CR77yaCiDCoCDCoFVWUi1NRFgyM0MtSW5zdFZvYyBIUSAo6ZyA5YaN6LeRM19IUC1Wb2NhbC1VVlIpCiDCoCDCoERlbXVjc1YzIOaIliBWNCAgKOmcgOWGjei3kTNfSFAtVm9jYWwtVVZSKQogwqAgwqAzX0hQLVZvY2FsLVVWUgogwqAgwqBVVlItTURYLU5FVCBNYWluCgoyLiDlpoLmnpzmnInliJnljrvlkozlo7DjgJAz6YCJMe+8jOWTquS4quaViOaenOWlveWwseeUqOWTquS4quOAke+8mgogwqAgwqA1X0hQX0thcmFva2UtVVZSCiDCoCDCoFVWUi1CVkUtNEJfU04tNDQxMDAtMemAiUluc3RydW1lbnRhbCBPbmx5wqAgwqAKIMKgIMKgNl9IUF9LYXJhb2tlLVVWUgogwqAgwqBVVlItTURYLU5FVCBLYXJhb2tlKOi9u+W6puWOu+mZpCzpnIDlpJrmrKEpCgozLiDlpoLmnpzmnInliJnljrvmt7flk43jgJAy6YCJMe+8jOagueaNrua3t+WTjeeahOeoi+W6pumAieaLqeOAke+8mgogwqAgwqBVVlItRGUtRWNoby1Ob3JtYWzpgIlObyBFY2hvIE9ubHnvvIjovbvluqbmt7flk43vvIkKIMKgIMKgVVZSLURlLUVjaG8tQWdncmVzc2l2ZemAiU5vIEVjaG8gT25see+8iOmHjeW6pua3t+WTje+8iQoK5LiN5o6o6I2Q55So5LuY6LS56L2v5Lu244CCdXZyNeWMheWQq+W4gumdouS4iuacgOW8uuW8gOa6kOaooeWei++8jOi9r+S7tuWujOWFqOWFjei0ue+8ge+8ge+8ge+8ge+8mmh0dHBzOi8vdWx0aW1hdGV2b2NhbHJlbW92ZXIuY29tLwrnoa7kv51VVlLmmK/mnIDmlrDniYjmnKzvvJo1LjYuMC7lpoLmnpxVVlLph4zpnaLmsqHkuIrov7DmqKHlnovvvIzngrnlsI/mibPmiYvvvIzljrtEb3dubG9hZCBDZW50ZXLph4zpnaLkuIvovb3mqKHlnovvvIjor7foh6rlpIfmoq/lrZDvvIzlkKbliJnkvJrkuIvovb3lpLHotKXvvIk=').decode())
|
193 |
+
|
194 |
+
|
195 |
spks = list(model.spk2id.keys())
|
196 |
|
197 |
+
sid = gr.Dropdown(label="音色(目前有58个)", choices=spks, value="HOSHINO_MINAMI")
|
198 |
sid.change(fn=update_dropdown,inputs=[sid],outputs=[sid])
|
199 |
sid.update(interactive=True)
|
200 |
+
with gr.Accordion(label="↓切换模型(默认58v1,音色具有抽奖性质,可尝试切换。也有特化的个人模型可选择)", open=False):
|
201 |
+
gr.Markdown(value="特殊说明:IKEDA_TERESA个人模型无聚类模型。44位成员的模型是9月份制作的,音质最好。")
|
202 |
modelstrs = gr.Dropdown(label = "模型", choices = modelPaths, value = modelPaths[0], type = "value")
|
203 |
btnMod = gr.Button("载入模型")
|
204 |
+
|
205 |
+
btnMod.click(loadmodel, inputs=[modelstrs], outputs = [sid])
|
206 |
with gr.Row():
|
207 |
slice_db = gr.Slider(label="切片阈值(较嘈杂时-30,保留呼吸声时-50)",maximum=-30, minimum=-70, step=1, value=-40)
|
208 |
vc_transform = gr.Slider(label="变调(整数,可以正负,半音数量,升高八度就是12)",maximum=16, minimum=-16, step=1, value=0)
|
209 |
+
f0_predictor = gr.Radio(label="f0预测器(如遇哑音可以尝试更换f0)凭干声干净程度选择。只推荐fcpe音色最像或rmvpe音最准", choices=["pm","dio","harvest","fcpe","rmvpe"], value="fcpe")
|
210 |
with gr.Row():
|
211 |
cluster_ratio = gr.Slider(label="聚类模型混合比例,0-1之间,默认为0不启用聚类,能提升音色相似度,但会导致咬字下降(如果使用建议0.5左右)",maximum=1, minimum=0, step=0.1, value=0)
|
212 |
output_format = gr.Radio(label="音频输出格式(MP3会导致时间轴多27ms,需合成请选flac)", choices=["flac", "mp3"], value = "mp3")#格式
|