hritiksdlccorp commited on
Commit
71aa8ed
1 Parent(s): 82837f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -21
app.py CHANGED
@@ -187,13 +187,8 @@ def get_spepc(hps, filename):
187
 
188
 
189
  dict_language = {
190
- ("中文1"): "all_zh",#全部按中文识别
191
- ("English"): "en",#全部按英文识别#######不变
192
- ("日文1"): "all_ja",#全部按日文识别
193
- ("中文"): "zh",#按中英混合识别####不变
194
- ("日本語"): "ja",#按日英混合识别####不变
195
- ("混合"): "auto",#多语种启动切分识别语种
196
- }
197
 
198
 
199
  def splite_en_inf(sentence, language):
@@ -361,7 +356,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
361
  if not duration(ref_wav_path):
362
  return None
363
  if text == '':
364
- wprint("Please enter text to generate/请输入生成文字")
365
  return None
366
  t0 = ttime()
367
  startTime=timer()
@@ -382,8 +377,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
382
  if (prompt_text[-1] not in splits): prompt_text += "。" if prompt_language != "en" else "."
383
  text = text.strip("\n")
384
  if (text[0] not in splits and len(get_first(text)) < 4): text = "。" + text if text_language != "en" else "." + text
385
- #print(("实际输入的参考文本:"), prompt_text)
386
- #print(("📝实际输入的目标文本:"), text)
387
  zero_wav = np.zeros(
388
  int(hps.data.sampling_rate * 0.3),
389
  dtype=np.float16 if is_half == True else np.float32,
@@ -425,7 +419,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
425
  text = cut5(text)
426
  while "\n\n" in text:
427
  text = text.replace("\n\n", "\n")
428
- print(f"🧨实际输入的目标文本(切句后):{text}\n")
429
  texts = text.split("\n")
430
  texts = merge_short_text_in_array(texts, 5)
431
  audio_opt = []
@@ -435,12 +429,12 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
435
  if (len(text.strip()) == 0):
436
  continue
437
  if (text[-1] not in splits): text += "。" if text_language != "en" else "."
438
- print(("\n🎈实际输入的目标文本(每句):"), text)
439
  phones2, word2ph2, norm_text2 = get_cleaned_text_final(text, text_language)
440
  try:
441
  bert2 = get_bert_final(phones2, word2ph2, norm_text2, text_language, device).to(dtype)
442
  except RuntimeError as e:
443
- wprint(f"The input text does not match the language/输入文本与语言不匹配: {e}")
444
  return None
445
  bert = torch.cat([bert1, bert2], 1)
446
 
@@ -481,7 +475,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
481
  .numpy()[0, 0]
482
  )
483
  except RuntimeError as e:
484
- wprint(f"The input text does not match the language/输入文本与语言不匹配: {e}")
485
  return None
486
 
487
  max_audio=np.abs(audio).max()
@@ -583,9 +577,7 @@ def cut5(inp):
583
 
584
 
585
  def custom_sort_key(s):
586
- # 使用正则表达式提取字符串中的数字部分和非数字部分
587
  parts = re.split('(\d+)', s)
588
- # 将数字部分转换为整数,非数字部分保持不变
589
  parts = [int(part) if part.isdigit() else part for part in parts]
590
  return parts
591
 
@@ -602,7 +594,7 @@ def wprint(text):
602
  def lang_detector(text):
603
  min_chars = 5
604
  if len(text) < min_chars:
605
- return "Input text too short/输入文本太短"
606
  try:
607
  detector = Detector(text).language
608
  lang_info = str(detector)
@@ -651,12 +643,12 @@ def trim_text(text,language):
651
 
652
  def duration(audio_file_path):
653
  if not audio_file_path:
654
- wprint("Failed to obtain uploaded audio/未找到音频文件")
655
  return False
656
  try:
657
  audio_duration = librosa.get_duration(filename=audio_file_path)
658
  if not 3 < audio_duration < 10:
659
- wprint("The audio length must be between 3~10 seconds/音频时长须在3~10秒之间")
660
  return False
661
  return True
662
  except FileNotFoundError:
@@ -715,7 +707,7 @@ def clone_voice(user_voice,user_text,user_lang):
715
  if not duration(user_voice):
716
  return None
717
  if user_text == '':
718
- wprint("Please enter text to generate/请输入生成文字")
719
  return None
720
  user_text=trim_text(user_text,user_lang)
721
  time1=timer()
@@ -760,7 +752,6 @@ for model_name, model_info in models.items():
760
 
761
  ##########GRADIO###########
762
 
763
-
764
  with gr.Blocks(theme='Kasien/ali_theme_custom') as app:
765
  gr.HTML('''
766
  <h1 style="font-size: 25px;">Text-to-Speech Generator</h1>
 
187
 
188
 
189
  dict_language = {
190
+ ("English"): "en"
191
+ }
 
 
 
 
 
192
 
193
 
194
  def splite_en_inf(sentence, language):
 
356
  if not duration(ref_wav_path):
357
  return None
358
  if text == '':
359
+ wprint("Please enter text to generate")
360
  return None
361
  t0 = ttime()
362
  startTime=timer()
 
377
  if (prompt_text[-1] not in splits): prompt_text += "。" if prompt_language != "en" else "."
378
  text = text.strip("\n")
379
  if (text[0] not in splits and len(get_first(text)) < 4): text = "。" + text if text_language != "en" else "." + text
380
+
 
381
  zero_wav = np.zeros(
382
  int(hps.data.sampling_rate * 0.3),
383
  dtype=np.float16 if is_half == True else np.float32,
 
419
  text = cut5(text)
420
  while "\n\n" in text:
421
  text = text.replace("\n\n", "\n")
422
+ print(text)
423
  texts = text.split("\n")
424
  texts = merge_short_text_in_array(texts, 5)
425
  audio_opt = []
 
429
  if (len(text.strip()) == 0):
430
  continue
431
  if (text[-1] not in splits): text += "。" if text_language != "en" else "."
432
+ print(text)
433
  phones2, word2ph2, norm_text2 = get_cleaned_text_final(text, text_language)
434
  try:
435
  bert2 = get_bert_final(phones2, word2ph2, norm_text2, text_language, device).to(dtype)
436
  except RuntimeError as e:
437
+ wprint(f"The input text does not match the language: {e}")
438
  return None
439
  bert = torch.cat([bert1, bert2], 1)
440
 
 
475
  .numpy()[0, 0]
476
  )
477
  except RuntimeError as e:
478
+ wprint(f"The input text does not match the language: {e}")
479
  return None
480
 
481
  max_audio=np.abs(audio).max()
 
577
 
578
 
579
  def custom_sort_key(s):
 
580
  parts = re.split('(\d+)', s)
 
581
  parts = [int(part) if part.isdigit() else part for part in parts]
582
  return parts
583
 
 
594
  def lang_detector(text):
595
  min_chars = 5
596
  if len(text) < min_chars:
597
+ return "Input text too short"
598
  try:
599
  detector = Detector(text).language
600
  lang_info = str(detector)
 
643
 
644
  def duration(audio_file_path):
645
  if not audio_file_path:
646
+ wprint("Failed to obtain uploaded audio")
647
  return False
648
  try:
649
  audio_duration = librosa.get_duration(filename=audio_file_path)
650
  if not 3 < audio_duration < 10:
651
+ wprint("The audio length must be between 3~10 seconds")
652
  return False
653
  return True
654
  except FileNotFoundError:
 
707
  if not duration(user_voice):
708
  return None
709
  if user_text == '':
710
+ wprint("Please enter text to generate")
711
  return None
712
  user_text=trim_text(user_text,user_lang)
713
  time1=timer()
 
752
 
753
  ##########GRADIO###########
754
 
 
755
  with gr.Blocks(theme='Kasien/ali_theme_custom') as app:
756
  gr.HTML('''
757
  <h1 style="font-size: 25px;">Text-to-Speech Generator</h1>