XzJosh commited on
Commit
0d9b885
1 Parent(s): f65fb3e

Upload 75 files

Browse files
Files changed (2) hide show
  1. .gitattributes +0 -13
  2. app.py +9 -9
.gitattributes CHANGED
@@ -32,16 +32,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  **/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
33
  *.safetensors filter=lfs diff=lfs merge=lfs -text
34
  *.ckpt filter=lfs diff=lfs merge=lfs -texttext/cmudict_cache.pickle filter=lfs diff=lfs merge=lfs -text
35
- audio/Azuma/Azuma_102.wav filter=lfs diff=lfs merge=lfs -text
36
- audio/Azuma/Azuma_148.wav filter=lfs diff=lfs merge=lfs -text
37
- audio/Azuma/Azuma_163.wav filter=lfs diff=lfs merge=lfs -text
38
- audio/Azuma/Azuma_173.wav filter=lfs diff=lfs merge=lfs -text
39
- audio/Azuma/Azuma_242.wav filter=lfs diff=lfs merge=lfs -text
40
- audio/Azuma/Azuma_516.wav filter=lfs diff=lfs merge=lfs -text
41
- audio/Azuma/Azuma_623.wav filter=lfs diff=lfs merge=lfs -text
42
- audio/Azuma/Azuma_664.wav filter=lfs diff=lfs merge=lfs -text
43
- audio/Taffy/t2~1_234.wav filter=lfs diff=lfs merge=lfs -text
44
- audio/Taffy/t2~1_260.wav filter=lfs diff=lfs merge=lfs -text
45
- audio/Taffy/Taffy_242.wav filter=lfs diff=lfs merge=lfs -text
46
- audio/Taffy/Taffy_250.wav filter=lfs diff=lfs merge=lfs -text
47
- text/cmudict_cache.pickle filter=lfs diff=lfs merge=lfs -text
 
32
  **/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
33
  *.safetensors filter=lfs diff=lfs merge=lfs -text
34
  *.ckpt filter=lfs diff=lfs merge=lfs -texttext/cmudict_cache.pickle filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -10,9 +10,9 @@ logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
10
  import pdb
11
 
12
  gpt_path = os.environ.get(
13
- "gpt_path", "models/Taffy/Taffy-e5.ckpt"
14
  )
15
- sovits_path = os.environ.get("sovits_path", "models/Taffy/Taffy_e20_s1020.pth")
16
  cnhubert_base_path = os.environ.get(
17
  "cnhubert_base_path", "pretrained_models/chinese-hubert-base"
18
  )
@@ -61,7 +61,7 @@ def get_bert_feature(text, word2ph):
61
  with torch.no_grad():
62
  inputs = tokenizer(text, return_tensors="pt")
63
  for i in inputs:
64
- inputs[i] = inputs[i].to(device) #####输入是long不用管精度问题,精度随bert_model
65
  res = bert_model(**inputs, output_hidden_states=True)
66
  res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()[1:-1]
67
  assert len(word2ph) == len(text)
@@ -446,17 +446,17 @@ def load_audio_text_mappings(folder_path, list_file_name):
446
  audio_to_text_mappings[audio_file_path] = text
447
  return text_to_audio_mappings, audio_to_text_mappings
448
 
449
- audio_folder_path = 'audio/Taffy'
450
- text_to_audio_mappings, audio_to_text_mappings = load_audio_text_mappings(audio_folder_path, 'Taffy.list')
451
 
452
  with gr.Blocks(title="GPT-SoVITS WebUI") as app:
453
  gr.Markdown(value="""
454
- # <center>【AI塔菲】在线语音生成(GPT-SoVITS)\n
455
 
456
  ### <center>模型作者:Xz乔希 https://space.bilibili.com/5859321\n
457
- ### <center>GPT-SoVITS在线合集:https://www.modelscope.cn/studios/xzjosh/GPT-SoVITS\n
458
  ### <center>数据集下载:https://huggingface.co/datasets/XzJosh/audiodataset\n
459
- ### <center>声音归属:永雏塔菲 https://space.bilibili.com/1265680561\n
460
  ### <center>GPT-SoVITS项目:https://github.com/RVC-Boss/GPT-SoVITS\n
461
  ### <center>使用本模型请严格遵守法律法规!发布二创作品请标注本项目作者及链接、作品使用GPT-SoVITS AI生成!\n
462
  ### <center>⚠️在线端不稳定且生成速度较慢,强烈建议下载模型本地推理!\n
@@ -482,7 +482,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
482
  prompt_language = gr.Dropdown(
483
  label="参考音频语种", choices=["中文", "英文", "日文"], value="中文"
484
  )
485
- gr.Markdown(value="*请填写需要合成的目标文本")
486
  with gr.Row():
487
  text = gr.Textbox(label="需要合成的文本", value="")
488
  text_language = gr.Dropdown(
 
10
  import pdb
11
 
12
  gpt_path = os.environ.get(
13
+ "gpt_path", "models/XingTong/XingTong-e10.ckpt"
14
  )
15
+ sovits_path = os.environ.get("sovits_path", "models/XingTong/XingTong_e40_s3440.pth")
16
  cnhubert_base_path = os.environ.get(
17
  "cnhubert_base_path", "pretrained_models/chinese-hubert-base"
18
  )
 
61
  with torch.no_grad():
62
  inputs = tokenizer(text, return_tensors="pt")
63
  for i in inputs:
64
+ inputs[i] = inputs[i].to(device)
65
  res = bert_model(**inputs, output_hidden_states=True)
66
  res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()[1:-1]
67
  assert len(word2ph) == len(text)
 
446
  audio_to_text_mappings[audio_file_path] = text
447
  return text_to_audio_mappings, audio_to_text_mappings
448
 
449
+ audio_folder_path = 'audio/XingTong'
450
+ text_to_audio_mappings, audio_to_text_mappings = load_audio_text_mappings(audio_folder_path, 'XingTong.list')
451
 
452
  with gr.Blocks(title="GPT-SoVITS WebUI") as app:
453
  gr.Markdown(value="""
454
+ # <center>【AI星瞳】在线语音生成(GPT-SoVITS)\n
455
 
456
  ### <center>模型作者:Xz乔希 https://space.bilibili.com/5859321\n
457
+ ### <center>【GPT-SoVITS】在线合集:https://www.modelscope.cn/studios/xzjosh/GPT-SoVITS\n
458
  ### <center>数据集下载:https://huggingface.co/datasets/XzJosh/audiodataset\n
459
+ ### <center>声音归属:星瞳_Official https://space.bilibili.com/401315430\n
460
  ### <center>GPT-SoVITS项目:https://github.com/RVC-Boss/GPT-SoVITS\n
461
  ### <center>使用本模型请严格遵守法律法规!发布二创作品请标注本项目作者及链接、作品使用GPT-SoVITS AI生成!\n
462
  ### <center>⚠️在线端不稳定且生成速度较慢,强烈建议下载模型本地推理!\n
 
482
  prompt_language = gr.Dropdown(
483
  label="参考音频语种", choices=["中文", "英文", "日文"], value="中文"
484
  )
485
+ gr.Markdown(value="*请填写需要合成的目标文本,中英混合选中文,日英混合选日文,暂不支持中日混合。")
486
  with gr.Row():
487
  text = gr.Textbox(label="需要合成的文本", value="")
488
  text_language = gr.Dropdown(