Mahiruoshi commited on
Commit
763ea6e
1 Parent(s): 661d3c7

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +350 -67
app.py CHANGED
@@ -1,8 +1,5 @@
1
  # flake8: noqa: E402
2
-
3
- import sys, os
4
  import logging
5
-
6
  logging.getLogger("numba").setLevel(logging.WARNING)
7
  logging.getLogger("markdown_it").setLevel(logging.WARNING)
8
  logging.getLogger("urllib3").setLevel(logging.WARNING)
@@ -13,8 +10,17 @@ logging.basicConfig(
13
  )
14
 
15
  logger = logging.getLogger(__name__)
16
-
 
17
  import torch
 
 
 
 
 
 
 
 
18
  import argparse
19
  import commons
20
  import utils
@@ -24,8 +30,21 @@ from text import cleaned_text_to_sequence, get_bert
24
  from text.cleaner import clean_text
25
  import gradio as gr
26
  import webbrowser
27
-
 
 
28
  net_g = None
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  if sys.platform == "darwin" and torch.backends.mps.is_available():
31
  device = "mps"
@@ -33,6 +52,35 @@ if sys.platform == "darwin" and torch.backends.mps.is_available():
33
  else:
34
  device = "cuda"
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  def get_text(text, language_str, hps):
38
  norm_text, phone, tone, word2ph = clean_text(text, language_str)
@@ -99,36 +147,211 @@ def infer(text, sdp_ratio, noise_scale, noise_scale_w, length_scale, sid, langua
99
  .float()
100
  .numpy()
101
  )
 
 
102
  del x_tst, tones, lang_ids, bert, x_tst_lengths, speakers
103
  return audio
104
 
105
 
106
  def tts_fn(
107
- text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale, language
108
  ):
109
- with torch.no_grad():
110
- audio = infer(
111
- text,
112
- sdp_ratio=sdp_ratio,
113
- noise_scale=noise_scale,
114
- noise_scale_w=noise_scale_w,
115
- length_scale=length_scale,
116
- sid=speaker,
117
- language=language,
118
- )
119
- torch.cuda.empty_cache()
120
- return "Success", (hps.data.sampling_rate, audio)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
 
123
  if __name__ == "__main__":
124
  parser = argparse.ArgumentParser()
125
  parser.add_argument(
126
- "-m", "--model", default="./logs/Mygo/G_63000.pth", help="path of your model"
127
  )
128
  parser.add_argument(
129
  "-c",
130
  "--config",
131
- default="./logs/Mygo/config.json",
132
  help="path of your config file",
133
  )
134
  parser.add_argument(
@@ -142,8 +365,6 @@ if __name__ == "__main__":
142
  if args.debug:
143
  logger.info("Enable DEBUG-LEVEL log")
144
  logging.basicConfig(level=logging.DEBUG)
145
- hps = utils.get_hparams_from_file(args.config)
146
-
147
  device = (
148
  "cuda:0"
149
  if torch.cuda.is_available()
@@ -153,6 +374,7 @@ if __name__ == "__main__":
153
  else "cpu"
154
  )
155
  )
 
156
  net_g = SynthesizerTrn(
157
  len(symbols),
158
  hps.data.filter_length // 2 + 1,
@@ -160,65 +382,126 @@ if __name__ == "__main__":
160
  n_speakers=hps.data.n_speakers,
161
  **hps.model,
162
  ).to(device)
163
- _ = net_g.eval()
164
-
165
- _ = utils.load_checkpoint(args.model, net_g, None, skip_optimizer=True)
166
-
167
  speaker_ids = hps.data.spk2id
168
  speakers = list(speaker_ids.keys())
169
  languages = ["ZH", "JP"]
 
 
 
 
 
 
 
170
  with gr.Blocks() as app:
171
- for name in speakers:
172
- with gr.TabItem(name):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  with gr.Row():
174
  with gr.Column():
175
- with gr.Row():
176
- gr.Markdown(
177
- '<div align="center">'
178
- f'<img style="width:auto;height:400px;" src="file/image/{name}.png">'
179
- '</div>'
180
- )
181
- text_output = gr.Textbox(label="Message")
182
- speaker = gr.Dropdown(
183
- choices=speakers, value=name, label="Speaker"
184
  )
185
- text_output = gr.Textbox(label="Message")
186
- language = gr.Dropdown(
187
- choices=languages, value=languages[1], label="Language"
188
  )
189
- with gr.Column():
190
- text = gr.TextArea(
191
- label="Text",
192
- placeholder="Input Text Here",
193
- value="私たちは、一緒にはいられない。",
 
 
194
  )
195
- audio_output = gr.Audio(label="Output Audio")
196
- btn = gr.Button("Generate!", variant="primary")
197
  sdp_ratio = gr.Slider(
198
- minimum=0, maximum=1, value=0.2, step=0.01, label="SDP Ratio"
199
  )
200
  noise_scale = gr.Slider(
201
- minimum=0.1, maximum=2, value=0.6, step=0.01, label="Noise Scale"
202
  )
203
  noise_scale_w = gr.Slider(
204
- minimum=0.1, maximum=2, value=0.8, step=0.01, label="Noise Scale W"
205
  )
206
  length_scale = gr.Slider(
207
- minimum=0.1, maximum=2, value=1, step=0.01, label="Length Scale"
208
  )
209
-
210
- btn.click(
211
- tts_fn,
212
- inputs=[
213
- text,
214
- speaker,
215
- sdp_ratio,
216
- noise_scale,
217
- noise_scale_w,
218
- length_scale,
219
- language,
220
- ],
221
- outputs=[text_output, audio_output],
222
- )
223
-
224
- app.launch()
 
 
 
1
  # flake8: noqa: E402
 
 
2
  import logging
 
3
  logging.getLogger("numba").setLevel(logging.WARNING)
4
  logging.getLogger("markdown_it").setLevel(logging.WARNING)
5
  logging.getLogger("urllib3").setLevel(logging.WARNING)
 
10
  )
11
 
12
  logger = logging.getLogger(__name__)
13
+ import datetime
14
+ import numpy as np
15
  import torch
16
+ from ebooklib import epub
17
+ import PyPDF2
18
+ from PyPDF2 import PdfReader
19
+ import zipfile
20
+ import shutil
21
+ import sys, os
22
+ import json
23
+ from bs4 import BeautifulSoup
24
  import argparse
25
  import commons
26
  import utils
 
30
  from text.cleaner import clean_text
31
  import gradio as gr
32
  import webbrowser
33
+ import re
34
+ from scipy.io.wavfile import write
35
+ from datetime import datetime
36
  net_g = None
37
+ BandList = {
38
+
39
+ "PoppinParty":["香澄","有咲","たえ","りみ","沙綾"],
40
+ "Afterglow":["蘭","モカ","ひまり","巴","つぐみ"],
41
+ "HelloHappyWorld":["こころ","美咲","薫","花音","はぐみ"],
42
+ "PastelPalettes":["彩","日菜","千聖","イヴ","麻弥"],
43
+ "Roselia":["友希那","紗夜","リサ","燐子","あこ"],
44
+ "RaiseASuilen":["レイヤ","ロック","ますき","チュチュ","パレオ"],
45
+ "Morfonica":["ましろ","瑠唯","つくし","七深","透子"],
46
+ "MyGo&AveMujica(Part)":["燈","愛音","そよ","立希","楽奈","祥子","睦","海鈴"],
47
+ }
48
 
49
  if sys.platform == "darwin" and torch.backends.mps.is_available():
50
  device = "mps"
 
52
  else:
53
  device = "cuda"
54
 
55
+ def is_japanese(string):
56
+ for ch in string:
57
+ if ord(ch) > 0x3040 and ord(ch) < 0x30FF:
58
+ return True
59
+ return False
60
+
61
+ def extrac(text):
62
+ text = re.sub("<[^>]*>","",text)
63
+ result_list = re.split(r'\n', text)
64
+ final_list = []
65
+ for i in result_list:
66
+ i = i.replace('\n','').replace(' ','')
67
+ #Current length of single sentence: 20
68
+ if len(i)>1:
69
+ if len(i) > 20:
70
+ try:
71
+ cur_list = re.split(r'。|!', i)
72
+ for i in cur_list:
73
+ if len(i)>1:
74
+ final_list.append(i+'。')
75
+ except:
76
+ pass
77
+ else:
78
+ final_list.append(i)
79
+ '''
80
+ final_list.append(i)
81
+ '''
82
+ final_list = [x for x in final_list if x != '']
83
+ return final_list
84
 
85
  def get_text(text, language_str, hps):
86
  norm_text, phone, tone, word2ph = clean_text(text, language_str)
 
147
  .float()
148
  .numpy()
149
  )
150
+ current_time = datetime.now()
151
+ print(str(current_time)+':'+str(sid))
152
  del x_tst, tones, lang_ids, bert, x_tst_lengths, speakers
153
  return audio
154
 
155
 
156
  def tts_fn(
157
+ text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale,LongSentence
158
  ):
159
+ if not LongSentence:
160
+ with torch.no_grad():
161
+ audio = infer(
162
+ text,
163
+ sdp_ratio=sdp_ratio,
164
+ noise_scale=noise_scale,
165
+ noise_scale_w=noise_scale_w,
166
+ length_scale=length_scale,
167
+ sid=speaker,
168
+ language= "JP" if is_japanese(text) else "ZH",
169
+ )
170
+ torch.cuda.empty_cache()
171
+ return (hps.data.sampling_rate, audio)
172
+ else:
173
+ audiopath = 'voice.wav'
174
+ a = ['【','[','(','(']
175
+ b = ['】',']',')',')']
176
+ for i in a:
177
+ text = text.replace(i,'<')
178
+ for i in b:
179
+ text = text.replace(i,'>')
180
+ final_list = extrac(text.replace('“','').replace('”',''))
181
+ audio_fin = []
182
+ for sentence in final_list:
183
+ with torch.no_grad():
184
+ audio = infer(
185
+ sentence,
186
+ sdp_ratio=sdp_ratio,
187
+ noise_scale=noise_scale,
188
+ noise_scale_w=noise_scale_w,
189
+ length_scale=length_scale,
190
+ sid=speaker,
191
+ language= "JP" if is_japanese(text) else "ZH",
192
+ )
193
+ audio_fin.append(audio)
194
+ return (hps.data.sampling_rate, np.concatenate(audio_fin))
195
+
196
+ def split_into_sentences(text):
197
+ """将文本分割为句子,基于中文的标点符号"""
198
+ sentences = re.split(r'(?<=[。!?…\n])', text)
199
+ return [sentence.strip() for sentence in sentences if sentence]
200
+
201
+
202
+ def seconds_to_ass_time(seconds):
203
+ """将秒数转换为ASS时间格式"""
204
+ hours = int(seconds / 3600)
205
+ minutes = int((seconds % 3600) / 60)
206
+ seconds = int(seconds) % 60
207
+ milliseconds = int((seconds - int(seconds)) * 1000)
208
+ return "{:01d}:{:02d}:{:02d}.{:02d}".format(hours, minutes, seconds, int(milliseconds / 10))
209
+
210
+ def generate_audio_and_srt_for_group(group, outputPath, group_index, sampling_rate, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale,spealerList,silenceTime):
211
+ audio_fin = []
212
+ ass_entries = []
213
+ start_time = 0
214
+
215
+ ass_header = """[Script Info]
216
+ ; Script generated by OpenAI Assistant
217
+ Title: Audiobook
218
+ ScriptType: v4.00+
219
+ WrapStyle: 0
220
+ PlayResX: 640
221
+ PlayResY: 360
222
+ ScaledBorderAndShadow: yes
223
+ [V4+ Styles]
224
+ Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
225
+ Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,1,1,2,10,10,10,1
226
+ [Events]
227
+ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
228
+ """
229
+
230
+ for sentence in group:
231
+ try:
232
+ print(sentence)
233
+ FakeSpeaker = sentence.split("|")[0]
234
+ print(FakeSpeaker)
235
+ SpeakersList = re.split('\n', spealerList)
236
+ if FakeSpeaker in list(hps.data.spk2id.keys()):
237
+ speaker = FakeSpeaker
238
+ for i in SpeakersList:
239
+ if FakeSpeaker == i.split("|")[1]:
240
+ speaker = i.split("|")[0]
241
+ speaker_ids = hps.data.spk2id
242
+
243
+ _, audio = tts_fn(sentence.split("|")[-1], speaker=speaker, sdp_ratio=sdp_ratio, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale, LongSentence=True)
244
+ silence_frames = int(silenceTime * 44010)
245
+ silence_data = np.zeros((silence_frames,), dtype=audio.dtype)
246
+ audio_fin.append(audio)
247
+ audio_fin.append(silence_data)
248
+
249
+ duration = len(audio) / sampling_rate
250
+ end_time = start_time + duration + silenceTime
251
+ ass_entries.append("Dialogue: 0,{},{},".format(seconds_to_ass_time(start_time), seconds_to_ass_time(end_time)) + "Default,,0,0,0,,{}".format(sentence.replace("|",":")))
252
+ start_time = end_time
253
+ except:
254
+ pass
255
+ wav_filename = os.path.join(outputPath, f'audiobook_part_{group_index}.wav')
256
+ ass_filename = os.path.join(outputPath, f'audiobook_part_{group_index}.ass')
257
+
258
+ write(wav_filename, sampling_rate, np.concatenate(audio_fin))
259
+
260
+ with open(ass_filename, 'w', encoding='utf-8') as f:
261
+ f.write(ass_header + '\n'.join(ass_entries))
262
+ return (hps.data.sampling_rate, np.concatenate(audio_fin))
263
+ def extract_text_from_epub(file_path):
264
+ book = epub.read_epub(file_path)
265
+ content = []
266
+ for item in book.items:
267
+ if isinstance(item, epub.EpubHtml):
268
+ soup = BeautifulSoup(item.content, 'html.parser')
269
+ content.append(soup.get_text())
270
+ return '\n'.join(content)
271
+
272
+ def extract_text_from_pdf(file_path):
273
+ with open(file_path, 'rb') as file:
274
+ reader = PdfReader(file)
275
+ content = [page.extract_text() for page in reader.pages]
276
+ return '\n'.join(content)
277
+
278
+ def extract_text_from_game2(data):
279
+ current_content = []
280
+
281
+ def _extract(data, current_data=None):
282
+ nonlocal current_content
283
+
284
+ if current_data is None:
285
+ current_data = {}
286
+
287
+ if isinstance(data, dict):
288
+ if 'name' in data and 'body' in data:
289
+ current_name = data['name']
290
+ current_body = data['body'].replace('\n', '')
291
+ current_content.append(f"{current_name}|{current_body}")
292
+
293
+ for key, value in data.items():
294
+ _extract(value, dict(current_data))
295
+
296
+ elif isinstance(data, list):
297
+ for item in data:
298
+ _extract(item, dict(current_data))
299
+
300
+ _extract(data)
301
+ return '\n'.join(current_content)
302
+
303
+ def extract_text_from_file(inputFile):
304
+ file_extension = os.path.splitext(inputFile)[1].lower()
305
+
306
+ if file_extension == ".epub":
307
+ return extract_text_from_epub(inputFile)
308
+ elif file_extension == ".pdf":
309
+ return extract_text_from_pdf(inputFile)
310
+ elif file_extension == ".txt":
311
+ with open(inputFile, 'r', encoding='utf-8') as f:
312
+ return f.read()
313
+ elif file_extension == ".asset":
314
+ with open(inputFile, 'r', encoding='utf-8') as f:
315
+ content = json.load(f)
316
+ return extract_text_from_game2(content) if extract_text_from_game2(content) != '' else extract_text_from_game2(content)
317
+ else:
318
+ raise ValueError(f"Unsupported file format: {file_extension}")
319
+
320
+ def audiobook(inputFile, groupsize, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale,spealerList,silenceTime):
321
+ directory_path = "books"
322
+ output_path = "books/audiobook_part_1.wav"
323
+
324
+ if os.path.exists(directory_path):
325
+ shutil.rmtree(directory_path)
326
+
327
+ os.makedirs(directory_path)
328
+ text = extract_text_from_file(inputFile.name)
329
+ sentences = split_into_sentences(text)
330
+ GROUP_SIZE = groupsize
331
+ for i in range(0, len(sentences), GROUP_SIZE):
332
+ group = sentences[i:i+GROUP_SIZE]
333
+ if spealerList == "":
334
+ spealerList = "无"
335
+ result = generate_audio_and_srt_for_group(group,directory_path, i//GROUP_SIZE + 1, 44100, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale,spealerList,silenceTime)
336
+ if not torch.cuda.is_available():
337
+ return result
338
+ return result
339
+
340
+ def loadmodel(model):
341
+ _ = net_g.eval()
342
+ _ = utils.load_checkpoint(model, net_g, None, skip_optimizer=True)
343
+ return "success"
344
 
345
 
346
  if __name__ == "__main__":
347
  parser = argparse.ArgumentParser()
348
  parser.add_argument(
349
+ "-m", "--model", default="./logs/BangDream/G_45000.pth", help="path of your model"
350
  )
351
  parser.add_argument(
352
  "-c",
353
  "--config",
354
+ default="configs/config.json",
355
  help="path of your config file",
356
  )
357
  parser.add_argument(
 
365
  if args.debug:
366
  logger.info("Enable DEBUG-LEVEL log")
367
  logging.basicConfig(level=logging.DEBUG)
 
 
368
  device = (
369
  "cuda:0"
370
  if torch.cuda.is_available()
 
374
  else "cpu"
375
  )
376
  )
377
+ hps = utils.get_hparams_from_file(args.config)
378
  net_g = SynthesizerTrn(
379
  len(symbols),
380
  hps.data.filter_length // 2 + 1,
 
382
  n_speakers=hps.data.n_speakers,
383
  **hps.model,
384
  ).to(device)
385
+ loadmodel(args.model)
 
 
 
386
  speaker_ids = hps.data.spk2id
387
  speakers = list(speaker_ids.keys())
388
  languages = ["ZH", "JP"]
389
+ examples = [
390
+ ["filelist/Scenarioband6-018.asset", 500, "つくし", "ましろ|真白\n七深|七深\n透子|透子\nつくし|筑紫\n瑠唯|瑠唯\nそよ|素世\n祥子|祥子", "扩展功能"],
391
+ ]
392
+ modelPaths = []
393
+ for dirpath, dirnames, filenames in os.walk("./logs/Bangdream/"):
394
+ for filename in filenames:
395
+ modelPaths.append(os.path.join(dirpath, filename))
396
  with gr.Blocks() as app:
397
+ gr.Markdown(
398
+ f"少歌邦邦全员TTS,使用本模型请严格遵守法律法规!\n 发布二创作品请注明项目和本模型作者<a href='https://space.bilibili.com/19874615/'>B站@Mahiroshi</a>及项目链接\n从 <a href='https://nijigaku.top/2023/10/03/BangDreamTTS/'>我的博客站点</a> 查看使用说明</a>"
399
+ )
400
+ for band in BandList:
401
+ with gr.TabItem(band):
402
+ for name in BandList[band]:
403
+ with gr.TabItem(name):
404
+ with gr.Row():
405
+ with gr.Column():
406
+ with gr.Row():
407
+ gr.Markdown(
408
+ '<div align="center">'
409
+ f'<img style="width:auto;height:400px;" src="file/image/{name}.png">'
410
+ '</div>'
411
+ )
412
+ length_scale = gr.Slider(
413
+ minimum=0.1, maximum=2, value=1, step=0.01, label="语速调节"
414
+ )
415
+ with gr.Accordion(label="切换模型(合成中文建议切换为早期模型)", open=False):
416
+ modelstrs = gr.Dropdown(label = "模型", choices = modelPaths, value = modelPaths[0], type = "value")
417
+ btnMod = gr.Button("载入模型")
418
+ statusa = gr.TextArea()
419
+ btnMod.click(loadmodel, inputs=[modelstrs], outputs = [statusa])
420
+ with gr.Column():
421
+ text = gr.TextArea(
422
+ label="输入纯日语或者中文",
423
+ placeholder="输入纯日语或者中文",
424
+ value="有个人躺在地上,哀嚎......\n有个人睡着了,睡在盒子里。\n我要把它打开,看看他的梦是什么。",
425
+ )
426
+ btn = gr.Button("点击生成", variant="primary")
427
+ audio_output = gr.Audio(label="Output Audio")
428
+ with gr.Accordion(label="其它参数设定", open=False):
429
+ sdp_ratio = gr.Slider(
430
+ minimum=0, maximum=1, value=0.2, step=0.01, label="SDP/DP混合比"
431
+ )
432
+ noise_scale = gr.Slider(
433
+ minimum=0.1, maximum=2, value=0.6, step=0.01, label="感情调节"
434
+ )
435
+ noise_scale_w = gr.Slider(
436
+ minimum=0.1, maximum=2, value=0.8, step=0.01, label="音素长度"
437
+ )
438
+ LongSentence = gr.Checkbox(value=True, label="Generate LongSentence")
439
+ speaker = gr.Dropdown(
440
+ choices=speakers, value=name, label="说话人"
441
+ )
442
+ btn.click(
443
+ tts_fn,
444
+ inputs=[
445
+ text,
446
+ speaker,
447
+ sdp_ratio,
448
+ noise_scale,
449
+ noise_scale_w,
450
+ length_scale,
451
+ LongSentence,
452
+ ],
453
+ outputs=[audio_output],
454
+ )
455
+ for i in examples:
456
+ with gr.Tab(i[-1]):
457
  with gr.Row():
458
  with gr.Column():
459
+ gr.Markdown(
460
+ f"从 <a href='https://nijigaku.top/2023/10/03/BangDreamTTS/'>我的博客站点</a> 查看自制galgame使用说明\n</a>"
461
+ )
462
+ inputFile = gr.inputs.File(label="上传txt(可设置角色对应表)、epub或mobi文件")
463
+ groupSize = gr.Slider(
464
+ minimum=10, maximum=1000,value = i[1], step=1, label="当个音频文件包含的最大字数"
 
 
 
465
  )
466
+ silenceTime = gr.Slider(
467
+ minimum=0, maximum=1, value=0.5, step=0.1, label="句子的间隔"
 
468
  )
469
+ spealerList = gr.TextArea(
470
+ label="角色对应表",
471
+ placeholder="左边是你想要在每一句话合成中用到的speaker(见角色清单)右边是你上传文本时分隔符左边设置的说话人:{ChoseSpeakerFromConfigList1}|{SeakerInUploadText1}\n{ChoseSpeakerFromConfigList2}|{SeakerInUploadText2}\n{ChoseSpeakerFromConfigList3}|{SeakerInUploadText3}\n",
472
+ value = i[3],
473
+ )
474
+ speaker = gr.Dropdown(
475
+ choices=speakers, value = i[2], label="选择默认说话人"
476
  )
477
+ with gr.Column():
 
478
  sdp_ratio = gr.Slider(
479
+ minimum=0, maximum=1, value=0.2, step=0.01, label="SDP/DP混合比"
480
  )
481
  noise_scale = gr.Slider(
482
+ minimum=0.1, maximum=2, value=0.6, step=0.01, label="感情调节"
483
  )
484
  noise_scale_w = gr.Slider(
485
+ minimum=0.1, maximum=2, value=0.8, step=0.01, label="音素长度"
486
  )
487
  length_scale = gr.Slider(
488
+ minimum=0.1, maximum=2, value=1, step=0.01, label="生成长度"
489
  )
490
+ LastAudioOutput = gr.Audio(label="当用cuda在本地运行时才能在book文件夹下浏览全部合成内容")
491
+ btn2 = gr.Button("点击生成", variant="primary")
492
+ btn2.click(
493
+ audiobook,
494
+ inputs=[
495
+ inputFile,
496
+ groupSize,
497
+ speaker,
498
+ sdp_ratio,
499
+ noise_scale,
500
+ noise_scale_w,
501
+ length_scale,
502
+ spealerList,
503
+ silenceTime
504
+ ],
505
+ outputs=[LastAudioOutput],
506
+ )
507
+ app.launch()