Mahiruoshi commited on
Commit
0a949ac
1 Parent(s): e9880c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +234 -21
app.py CHANGED
@@ -1,8 +1,5 @@
1
  # flake8: noqa: E402
2
-
3
- import sys, os
4
  import logging
5
-
6
  logging.getLogger("numba").setLevel(logging.WARNING)
7
  logging.getLogger("markdown_it").setLevel(logging.WARNING)
8
  logging.getLogger("urllib3").setLevel(logging.WARNING)
@@ -16,6 +13,14 @@ logger = logging.getLogger(__name__)
16
  import datetime
17
  import numpy as np
18
  import torch
 
 
 
 
 
 
 
 
19
  import argparse
20
  import commons
21
  import utils
@@ -26,7 +31,7 @@ from text.cleaner import clean_text
26
  import gradio as gr
27
  import webbrowser
28
  import re
29
-
30
  net_g = None
31
  BandList = {
32
  "PoppinParty":["香澄","有咲","たえ","りみ","沙綾"],
@@ -36,8 +41,7 @@ BandList = {
36
  "Roselia":["友希那","紗夜","リサ","燐子","あこ"],
37
  "RaiseASuilen":["レイヤ","ロック","ますき","チュチュ","パレオ"],
38
  "Morfonica":["ましろ","瑠唯","つくし","七深","透子"],
39
- "MyGo":["燈","愛音","そよ","立希","楽奈"],
40
- "AveMujica(初华和喵梦没法用)":["祥子","睦","海鈴","初華","にゃむ"],
41
  }
42
 
43
  if sys.platform == "darwin" and torch.backends.mps.is_available():
@@ -74,7 +78,6 @@ def extrac(text):
74
  final_list.append(i)
75
  '''
76
  final_list = [x for x in final_list if x != '']
77
- print(final_list)
78
  return final_list
79
 
80
  def get_text(text, language_str, hps):
@@ -183,15 +186,168 @@ def tts_fn(
183
  sid=speaker,
184
  language= "JP" if is_japanese(text) else "ZH",
185
  )
186
- print(sentence)
187
  audio_fin.append(audio)
188
  return (hps.data.sampling_rate, np.concatenate(audio_fin))
189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
  if __name__ == "__main__":
192
  parser = argparse.ArgumentParser()
193
  parser.add_argument(
194
- "-m", "--model", default="./logs/BangDream/G_17000.pth", help="path of your model"
195
  )
196
  parser.add_argument(
197
  "-c",
@@ -235,7 +391,13 @@ if __name__ == "__main__":
235
  speaker_ids = hps.data.spk2id
236
  speakers = list(speaker_ids.keys())
237
  languages = ["ZH", "JP"]
 
 
 
238
  with gr.Blocks() as app:
 
 
 
239
  for band in BandList:
240
  with gr.TabItem(band):
241
  for name in BandList[band]:
@@ -251,27 +413,27 @@ if __name__ == "__main__":
251
  LongSentence = gr.Checkbox(value=True, label="Generate LongSentence")
252
  with gr.Column():
253
  text = gr.TextArea(
254
- label="Text",
255
- placeholder="Input Text Here",
256
  value="純粋な日本語または中国語を入力してください。",
257
  )
258
- btn = gr.Button("Generate!", variant="primary")
259
  audio_output = gr.Audio(label="Output Audio")
260
- with gr.Accordion(label="Setting", open=False):
261
  sdp_ratio = gr.Slider(
262
- minimum=0, maximum=1, value=0.2, step=0.01, label="SDP Ratio"
263
  )
264
  noise_scale = gr.Slider(
265
- minimum=0.1, maximum=2, value=0.6, step=0.01, label="Noise Scale"
266
  )
267
  noise_scale_w = gr.Slider(
268
- minimum=0.1, maximum=2, value=0.8, step=0.01, label="Noise Scale W"
269
  )
270
  length_scale = gr.Slider(
271
- minimum=0.1, maximum=2, value=1, step=0.01, label="Length Scale"
272
  )
273
  speaker = gr.Dropdown(
274
- choices=speakers, value=name, label="Speaker"
275
  )
276
  btn.click(
277
  tts_fn,
@@ -284,7 +446,58 @@ if __name__ == "__main__":
284
  length_scale,
285
  LongSentence,
286
  ],
287
- outputs=[ audio_output],
288
  )
289
-
290
- app.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # flake8: noqa: E402
 
 
2
  import logging
 
3
  logging.getLogger("numba").setLevel(logging.WARNING)
4
  logging.getLogger("markdown_it").setLevel(logging.WARNING)
5
  logging.getLogger("urllib3").setLevel(logging.WARNING)
 
13
  import datetime
14
  import numpy as np
15
  import torch
16
+ from ebooklib import epub
17
+ import PyPDF2
18
+ from PyPDF2 import PdfReader
19
+ import zipfile
20
+ import shutil
21
+ import sys, os
22
+ import json
23
+ from bs4 import BeautifulSoup
24
  import argparse
25
  import commons
26
  import utils
 
31
  import gradio as gr
32
  import webbrowser
33
  import re
34
+ from scipy.io.wavfile import write
35
  net_g = None
36
  BandList = {
37
  "PoppinParty":["香澄","有咲","たえ","りみ","沙綾"],
 
41
  "Roselia":["友希那","紗夜","リサ","燐子","あこ"],
42
  "RaiseASuilen":["レイヤ","ロック","ますき","チュチュ","パレオ"],
43
  "Morfonica":["ましろ","瑠唯","つくし","七深","透子"],
44
+ "MyGo&AveMujica(Part)":["燈","愛音","そよ","立希","楽奈","祥子","睦","海鈴"],
 
45
  }
46
 
47
  if sys.platform == "darwin" and torch.backends.mps.is_available():
 
78
  final_list.append(i)
79
  '''
80
  final_list = [x for x in final_list if x != '']
 
81
  return final_list
82
 
83
  def get_text(text, language_str, hps):
 
186
  sid=speaker,
187
  language= "JP" if is_japanese(text) else "ZH",
188
  )
 
189
  audio_fin.append(audio)
190
  return (hps.data.sampling_rate, np.concatenate(audio_fin))
191
 
192
+ def split_into_sentences(text):
193
+ """将文本分割为句子,基于中文的标点符号"""
194
+ sentences = re.split(r'(?<=[。!?…\n])', text)
195
+ return [sentence.strip() for sentence in sentences if sentence]
196
+
197
+
198
+ def seconds_to_ass_time(seconds):
199
+ """将秒数转换为ASS时间格式"""
200
+ hours = int(seconds / 3600)
201
+ minutes = int((seconds % 3600) / 60)
202
+ seconds = int(seconds) % 60
203
+ milliseconds = int((seconds - int(seconds)) * 1000)
204
+ return "{:01d}:{:02d}:{:02d}.{:02d}".format(hours, minutes, seconds, int(milliseconds / 10))
205
+
206
+ def generate_audio_and_srt_for_group(group, outputPath, group_index, sampling_rate, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale,spealerList,silenceTime):
207
+ audio_fin = []
208
+ ass_entries = []
209
+ start_time = 0
210
+
211
+ ass_header = """[Script Info]
212
+ ; Script generated by OpenAI Assistant
213
+ Title: Audiobook
214
+ ScriptType: v4.00+
215
+ WrapStyle: 0
216
+ PlayResX: 640
217
+ PlayResY: 360
218
+ ScaledBorderAndShadow: yes
219
+
220
+ [V4+ Styles]
221
+ Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
222
+ Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,1,1,2,10,10,10,1
223
+
224
+ [Events]
225
+ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
226
+ """
227
+
228
+ for sentence in group:
229
+ try:
230
+ print(sentence)
231
+ FakeSpeaker = sentence.split("|")[0]
232
+ print(FakeSpeaker)
233
+ SpeakersList = re.split('\n', spealerList)
234
+ if FakeSpeaker in list(hps.data.spk2id.keys()):
235
+ speaker = FakeSpeaker
236
+ for i in SpeakersList:
237
+ if FakeSpeaker == i.split("|")[1]:
238
+ speaker = i.split("|")[0]
239
+ speaker_ids = hps.data.spk2id
240
+
241
+ _, audio = tts_fn(sentence.split("|")[-1], speaker=speaker, sdp_ratio=sdp_ratio, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale, LongSentence=True)
242
+ silence_frames = int(silenceTime * 44010)
243
+ silence_data = np.zeros((silence_frames,), dtype=audio.dtype)
244
+ audio_fin.append(audio)
245
+ audio_fin.append(silence_data)
246
+
247
+ duration = len(audio) / sampling_rate
248
+ end_time = start_time + duration + silenceTime
249
+ ass_entries.append("Dialogue: 0,{},{},".format(seconds_to_ass_time(start_time), seconds_to_ass_time(end_time)) + "Default,,0,0,0,,{}".format(sentence.replace("|",":")))
250
+ start_time = end_time
251
+ except:
252
+ pass
253
+ wav_filename = os.path.join(outputPath, f'audiobook_part_{group_index}.wav')
254
+ ass_filename = os.path.join(outputPath, f'audiobook_part_{group_index}.ass')
255
+
256
+ write(wav_filename, sampling_rate, np.concatenate(audio_fin))
257
+
258
+ with open(ass_filename, 'w', encoding='utf-8') as f:
259
+ f.write(ass_header + '\n'.join(ass_entries))
260
+ return (hps.data.sampling_rate, np.concatenate(audio_fin))
261
+ def extract_text_from_epub(file_path):
262
+ book = epub.read_epub(file_path)
263
+ content = []
264
+ for item in book.items:
265
+ if isinstance(item, epub.EpubHtml):
266
+ soup = BeautifulSoup(item.content, 'html.parser')
267
+ content.append(soup.get_text())
268
+ return '\n'.join(content)
269
+
270
+ def extract_text_from_pdf(file_path):
271
+ with open(file_path, 'rb') as file:
272
+ reader = PdfReader(file)
273
+ content = [page.extract_text() for page in reader.pages]
274
+ return '\n'.join(content)
275
+
276
+ def extract_text_from_game(data):
277
+ current_content = []
278
+
279
+ def _extract(data, current_data=None):
280
+ nonlocal current_content
281
+
282
+ if current_data is None:
283
+ current_data = {}
284
+
285
+ if isinstance(data, dict):
286
+ if 'windowDisplayName' in data:
287
+ current_data['windowDisplayName'] = data['windowDisplayName']
288
+ if 'body' in data:
289
+ current_data['body'] = data['body'].replace('\n', '')
290
+ if 'voiceId' in data:
291
+ current_data['voiceId'] = data['voiceId']
292
+
293
+ valid_data = all(current_data.get(k) for k in ['windowDisplayName', 'body', 'voiceId'])
294
+ valid_displayname = "・" not in current_data.get('windowDisplayName', "")
295
+ valid_body = bool(re.sub(r'[^\w]', '', current_data.get('body', "")))
296
+
297
+ if valid_data and valid_displayname and valid_body:
298
+ current_content.append(f"{current_data['windowDisplayName']}|{current_data['body']}")
299
+
300
+ for key in data:
301
+ _extract(data[key], dict(current_data))
302
+
303
+ elif isinstance(data, list):
304
+ for item in data:
305
+ _extract(item, dict(current_data))
306
+
307
+ _extract(data)
308
+ return '\n'.join(current_content)
309
+
310
+ def extract_text_from_file(inputFile):
311
+ file_extension = os.path.splitext(inputFile)[1].lower()
312
+
313
+ if file_extension == ".epub":
314
+ return extract_text_from_epub(inputFile)
315
+ elif file_extension == ".pdf":
316
+ return extract_text_from_pdf(inputFile)
317
+ elif file_extension == ".txt":
318
+ with open(inputFile, 'r', encoding='utf-8') as f:
319
+ return f.read()
320
+ elif file_extension == ".asset":
321
+ with open(inputFile, 'r', encoding='utf-8') as f:
322
+ content = json.load(f)
323
+ return extract_text_from_game(content)
324
+ else:
325
+ raise ValueError(f"Unsupported file format: {file_extension}")
326
+
327
+ def audiobook(inputFile, groupsize, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale,spealerList,silenceTime):
328
+ directory_path = "books"
329
+ output_path = "books/audiobook_part_1.wav"
330
+
331
+ if os.path.exists(directory_path):
332
+ shutil.rmtree(directory_path)
333
+
334
+ os.makedirs(directory_path)
335
+ text = extract_text_from_file(inputFile.name)
336
+ sentences = split_into_sentences(text)
337
+ GROUP_SIZE = groupsize
338
+ for i in range(0, len(sentences), GROUP_SIZE):
339
+ group = sentences[i:i+GROUP_SIZE]
340
+ if spealerList == "":
341
+ spealerList = "无"
342
+ result = generate_audio_and_srt_for_group(group,directory_path, i//GROUP_SIZE + 1, 44100, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale,spealerList,silenceTime)
343
+ if not torch.cuda.is_available():
344
+ return result
345
+ return result
346
 
347
  if __name__ == "__main__":
348
  parser = argparse.ArgumentParser()
349
  parser.add_argument(
350
+ "-m", "--model", default="./logs/BangDream/G_45000.pth", help="path of your model"
351
  )
352
  parser.add_argument(
353
  "-c",
 
391
  speaker_ids = hps.data.spk2id
392
  speakers = list(speaker_ids.keys())
393
  languages = ["ZH", "JP"]
394
+ examples = [
395
+ ["filelist/Scenarioband6-018.asset", 500, "つくし", "ましろ|真白\n七深|七深\n透子|透子\nつくし|筑紫\n瑠唯|瑠唯\nそよ|素世\n祥子|祥子", "扩展功能"],
396
+ ]
397
  with gr.Blocks() as app:
398
+ gr.Markdown(
399
+ '# Bang Dream全员TTS,使用本模型请严格遵守法律法规!\n发布二创作品请标注本项目作者及链接、作品使用Bert-VITS2 AI生成!'
400
+ )
401
  for band in BandList:
402
  with gr.TabItem(band):
403
  for name in BandList[band]:
 
413
  LongSentence = gr.Checkbox(value=True, label="Generate LongSentence")
414
  with gr.Column():
415
  text = gr.TextArea(
416
+ label="输入纯日语或者中文",
417
+ placeholder="输入纯日语或者中文",
418
  value="純粋な日本語または中国語を入力してください。",
419
  )
420
+ btn = gr.Button("点击生成", variant="primary")
421
  audio_output = gr.Audio(label="Output Audio")
422
+ with gr.Accordion(label="TTS设定", open=False):
423
  sdp_ratio = gr.Slider(
424
+ minimum=0, maximum=1, value=0.2, step=0.01, label="SDP/DP混合比"
425
  )
426
  noise_scale = gr.Slider(
427
+ minimum=0.1, maximum=2, value=0.6, step=0.01, label="感情调节"
428
  )
429
  noise_scale_w = gr.Slider(
430
+ minimum=0.1, maximum=2, value=0.8, step=0.01, label="音素长度"
431
  )
432
  length_scale = gr.Slider(
433
+ minimum=0.1, maximum=2, value=1, step=0.01, label="生成长度"
434
  )
435
  speaker = gr.Dropdown(
436
+ choices=speakers, value=name, label="说话人"
437
  )
438
  btn.click(
439
  tts_fn,
 
446
  length_scale,
447
  LongSentence,
448
  ],
449
+ outputs=[audio_output],
450
  )
451
+ for i in examples:
452
+ with gr.Tab(i[-1]):
453
+ with gr.Row():
454
+ with gr.Column():
455
+ gr.Markdown(
456
+ f"从 <a href='filelists'>filelists文件夹</a> 下载示例\n游戏脚本见<a href='https://bestdori.com/tool/explorer/asset/cn/scenario'>bestdori</a>"
457
+ )
458
+ inputFile = gr.inputs.File(label="上传游戏脚本(日文)、中文脚本(需设置角色对应关系)、自制文、(需设置角色对应关系")
459
+ groupSize = gr.Slider(
460
+ minimum=10, maximum=1000,value = i[1], step=1, label="当个音频文件包含的最大字数"
461
+ )
462
+ silenceTime = gr.Slider(
463
+ minimum=0, maximum=1, value=0.5, step=0.1, label="句子的间隔"
464
+ )
465
+ spealerList = gr.TextArea(
466
+ label="角色对应表",
467
+ placeholder="左边是你想要在每一句话合成中用到的speaker(见角色清单)右边是你上传文本时分隔符左边设置的说话人:{ChoseSpeakerFromConfigList1}|{SeakerInUploadText1}\n{ChoseSpeakerFromConfigList2}|{SeakerInUploadText2}\n{ChoseSpeakerFromConfigList3}|{SeakerInUploadText3}\n",
468
+ value = i[3],
469
+ )
470
+ speaker = gr.Dropdown(
471
+ choices=speakers, value = i[2], label="角色清单"
472
+ )
473
+ with gr.Column():
474
+ sdp_ratio = gr.Slider(
475
+ minimum=0, maximum=1, value=0.2, step=0.01, label="SDP/DP混合比"
476
+ )
477
+ noise_scale = gr.Slider(
478
+ minimum=0.1, maximum=2, value=0.6, step=0.01, label="感情调节"
479
+ )
480
+ noise_scale_w = gr.Slider(
481
+ minimum=0.1, maximum=2, value=0.8, step=0.01, label="音素长度"
482
+ )
483
+ length_scale = gr.Slider(
484
+ minimum=0.1, maximum=2, value=1, step=0.01, label="生成长度"
485
+ )
486
+ LastAudioOutput = gr.Audio(label="当用cuda在本地运行时才能在book文件夹下浏览全部合成内容")
487
+ btn2 = gr.Button("点击生成", variant="primary")
488
+ btn2.click(
489
+ audiobook,
490
+ inputs=[
491
+ inputFile,
492
+ groupSize,
493
+ speaker,
494
+ sdp_ratio,
495
+ noise_scale,
496
+ noise_scale_w,
497
+ length_scale,
498
+ spealerList,
499
+ silenceTime
500
+ ],
501
+ outputs=[LastAudioOutput],
502
+ )
503
+ app.launch()