Ailyth commited on
Commit
9a035cf
1 Parent(s): c283d94

0227-013453add_models

Browse files
MODELS/Mandarin/TJ/A0003_S004_0_G0001_1.WAV.mp3 ADDED
Binary file (28.8 kB). View file
 
MODELS/Mandarin/TJ/A0003_S004_0_G0001_61.WAV.mp3 ADDED
Binary file (25 kB). View file
 
MODELS/Mandarin/TJ/A0003_S006_0_G0001_189.WAV.mp3 ADDED
Binary file (37.5 kB). View file
 
MODELS/Mandarin/TJ/TJ.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce1e4ac5b13b0f497bea9240fe055268ea8b85dfd36a6bba76719b12a74e6aef
3
+ size 155087677
MODELS/Mandarin/TJ/TJ.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd79ef3c6466b1d604ecae3ccd1d4a1bc26c41b05d80cbe5375acd0a1d1524da
3
+ size 84932551
MODELS/Mandarin/ZZ/A0001_S001_0_G0001_10.WAV.mp3 ADDED
Binary file (39.5 kB). View file
 
MODELS/Mandarin/ZZ/A0001_S003_0_G0001_14.WAV.mp3 ADDED
Binary file (60 kB). View file
 
MODELS/Mandarin/ZZ/A0001_S003_0_G0001_2.WAV.mp3 ADDED
Binary file (30.7 kB). View file
 
MODELS/Mandarin/ZZ/ZZ.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03e23f41526ca33b71026da306259e5bb9b2f3439de4bd9e48a0647bf94065a6
3
+ size 155087378
MODELS/Mandarin/ZZ/ZZ.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5767c5fd9afd85455348bce589b6aafec143a16f87370666e4df1d12031d86db
3
+ size 84931874
app.py CHANGED
@@ -61,8 +61,6 @@ pipe = pipeline(
61
  is_half = eval(
62
  os.environ.get("is_half", "True" if torch.cuda.is_available() else "False")
63
  )
64
- device = "cuda" if torch.cuda.is_available() else "cpu"
65
-
66
 
67
  tokenizer = AutoTokenizer.from_pretrained(bert_path)
68
  bert_model = AutoModelForMaskedLM.from_pretrained(bert_path)
@@ -731,35 +729,35 @@ with gr.Blocks(theme='Kasien/ali_theme_custom') as app:
731
  If you like this space, please click the ❤️ at the top of the page..如喜欢,请点一下页面顶部的❤️<br>
732
  </p>''')
733
 
734
- gr.Markdown("""* This space is based on the text-to-speech generation solution GPT-SoVITS .
735
  You can visit the repo's github homepage to learn training and inference.<br>
736
- 本空间基于文字转语音生成方案 GPT-SoVITS . 你可以前往项目的github主页学习如何推理和训练。
737
  * ⚠️Generating voice is very slow due to using HuggingFace's free CPU in this space.
738
  For faster generation, click the Colab icon below to use this space in Colab,
739
  which will significantly improve the speed.<br>
740
  由于本空间使用huggingface的免费CPU进行推理,因此速度很慢,如想快速生成,请点击下方的Colab图标,
741
  前往Colab使用已获得更快的生成速度。
742
- <br>Colabの使用を強くお勧めします。より速い生成速度が得られます。
743
- * The model's corresponding language is its native language, but in fact,
744
- each model can speak three languages.<br>模型对应的语言是其母语,但实际上,
745
- 每个模型都能说三种语言<br>モデルに対応する言語はその母国語ですが、実際には、各モデルは3つの言語を話すことができます。""")
746
- gr.HTML('''<a href="https://colab.research.google.com/drive/1fTuPZ4tZsAjS-TrhQWMCb7KRdnU8aF6j#scrollTo=MDtJIbLdLHe9" target="_blank"><img src="https://camo.githubusercontent.com/dd83d4a334eab7ada034c13747d9e2237182826d32e3fda6629740b6e02f18d8/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f436f6c61622d4639414230303f7374796c653d666f722d7468652d6261646765266c6f676f3d676f6f676c65636f6c616226636f6c6f723d353235323532" alt="colab"></a>
747
  ''')
748
 
 
 
 
749
  default_voice_wav, default_voice_wav_words, default_language, _, default_model_name, _, default_tone_sample_path = update_model("Trump")
750
  english_models = [name for name, _ in models_by_language["English"]]
751
  chinese_models = [name for name, _ in models_by_language["中文"]]
752
  japanese_models = [name for name, _ in models_by_language["日本語"]]
753
  with gr.Row():
754
  english_choice = gr.Radio(english_models, label="EN|English Model",value="Trump",scale=3)
755
- chinese_choice = gr.Radio(chinese_models, label="CN|中文模型",scale=2)
756
  japanese_choice = gr.Radio(japanese_models, label="JP|日本語モデル",scale=4)
757
 
758
  plsh='Text must match the selected language option to prevent errors, for example, if English is input but Chinese is selected for generation.\n文字一定要和语言选项匹配,不然要报错,比如输入的是英文,生成语言选中文'
759
  limit='Max 70 words. Excess will be ignored./单次最多处理120字左右,多余的会被忽略'
760
 
761
  gr.HTML('''
762
- <b>输入文字</b>''')
763
  with gr.Row():
764
  model_name = gr.Textbox(label="Seleted Model/已选模型", value=default_model_name, scale=1)
765
  text = gr.Textbox(label="Input some text for voice generation/输入想要生成语音的文字", lines=5,scale=8,
@@ -805,14 +803,14 @@ with gr.Blocks(theme='Kasien/ali_theme_custom') as app:
805
 
806
 
807
  gr.HTML('''
808
- <b>开始生成</b>''')
809
  with gr.Row():
810
  main_button = gr.Button("✨Generate Voice", variant="primary", scale=1)
811
  output = gr.Audio(label="💾Download it by clicking ⬇️", scale=3)
812
  #info = gr.Textbox(label="INFO", visible=True, readonly=True, scale=1)
813
 
814
  gr.HTML('''
815
- Generation is slower, please be patient and wait/合成比较慢,请耐心等待<br>
816
  If it generated silence, please try again./如果生成了空白声音,请重试
817
  <br><br><br><br>
818
  <h1 style="font-size: 25px;">Clone custom Voice/克隆自定义声音</h1>
 
61
  is_half = eval(
62
  os.environ.get("is_half", "True" if torch.cuda.is_available() else "False")
63
  )
 
 
64
 
65
  tokenizer = AutoTokenizer.from_pretrained(bert_path)
66
  bert_model = AutoModelForMaskedLM.from_pretrained(bert_path)
 
729
  If you like this space, please click the ❤️ at the top of the page..如喜欢,请点一下页面顶部的❤️<br>
730
  </p>''')
731
 
732
+ gr.Markdown("""* This space is based on the text-to-speech generation solution [GPT-SoVITS](https://github.com/RVC-Boss/GPT-SoVITS) .
733
  You can visit the repo's github homepage to learn training and inference.<br>
734
+ 本空间基于文字转语音生成方案[GPT-SoVITS](https://github.com/RVC-Boss/GPT-SoVITS) . 你可以前往项目的github主页学习如何推理和训练。
735
  * ⚠️Generating voice is very slow due to using HuggingFace's free CPU in this space.
736
  For faster generation, click the Colab icon below to use this space in Colab,
737
  which will significantly improve the speed.<br>
738
  由于本空间使用huggingface的免费CPU进行推理,因此速度很慢,如想快速生成,请点击下方的Colab图标,
739
  前往Colab使用已获得更快的生成速度。
740
+ <br>Colabの使用を強くお勧めします。より速い生成速度が得られます。 """)
741
+ gr.HTML('''<a href="https://colab.research.google.com/drive/1fTuPZ4tZsAjS-TrhQWMCb7KRdnU8aF6j" target="_blank"><img src="https://camo.githubusercontent.com/dd83d4a334eab7ada034c13747d9e2237182826d32e3fda6629740b6e02f18d8/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f436f6c61622d4639414230303f7374796c653d666f722d7468652d6261646765266c6f676f3d676f6f676c65636f6c616226636f6c6f723d353235323532" alt="colab"></a>
 
 
 
742
  ''')
743
 
744
+ gr.Markdown('''* The model's corresponding language is its native language, but in fact,
745
+ each model can speak three languages.<br>模型对应的语言是其母语,但实际上,
746
+ 每个模型都能说三种语言<br>モデルに対応する言語はその母国語ですが、実際には、各モデルは3つの言語を話すことができます。''')
747
  default_voice_wav, default_voice_wav_words, default_language, _, default_model_name, _, default_tone_sample_path = update_model("Trump")
748
  english_models = [name for name, _ in models_by_language["English"]]
749
  chinese_models = [name for name, _ in models_by_language["中文"]]
750
  japanese_models = [name for name, _ in models_by_language["日本語"]]
751
  with gr.Row():
752
  english_choice = gr.Radio(english_models, label="EN|English Model",value="Trump",scale=3)
753
+ chinese_choice = gr.Radio(chinese_models, label="CN|中文模型",scale=3)
754
  japanese_choice = gr.Radio(japanese_models, label="JP|日本語モデル",scale=4)
755
 
756
  plsh='Text must match the selected language option to prevent errors, for example, if English is input but Chinese is selected for generation.\n文字一定要和语言选项匹配,不然要报错,比如输入的是英文,生成语言选中文'
757
  limit='Max 70 words. Excess will be ignored./单次最多处理120字左右,多余的会被忽略'
758
 
759
  gr.HTML('''
760
+ <b>Input text/输入文字</b>''')
761
  with gr.Row():
762
  model_name = gr.Textbox(label="Seleted Model/已选模型", value=default_model_name, scale=1)
763
  text = gr.Textbox(label="Input some text for voice generation/输入想要生成语音的文字", lines=5,scale=8,
 
803
 
804
 
805
  gr.HTML('''
806
+ <b>Start generating/开始生成</b>''')
807
  with gr.Row():
808
  main_button = gr.Button("✨Generate Voice", variant="primary", scale=1)
809
  output = gr.Audio(label="💾Download it by clicking ⬇️", scale=3)
810
  #info = gr.Textbox(label="INFO", visible=True, readonly=True, scale=1)
811
 
812
  gr.HTML('''
813
+ Quickly generate with Colab/使用Colab快速生成:<a href="https://colab.research.google.com/drive/1fTuPZ4tZsAjS-TrhQWMCb7KRdnU8aF6j" target="_blank"><img src="https://camo.githubusercontent.com/dd83d4a334eab7ada034c13747d9e2237182826d32e3fda6629740b6e02f18d8/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f436f6c61622d4639414230303f7374796c653d666f722d7468652d6261646765266c6f676f3d676f6f676c65636f6c616226636f6c6f723d353235323532" alt="colab"></a>
814
  If it generated silence, please try again./如果生成了空白声音,请重试
815
  <br><br><br><br>
816
  <h1 style="font-size: 25px;">Clone custom Voice/克隆自定义声音</h1>
info.py CHANGED
@@ -276,6 +276,54 @@ models = {
276
  },
277
  },
278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
  "白い悪魔": {
281
  "gpt_weight": "MODELS/G1/G1.ckpt",
 
276
  },
277
  },
278
 
279
+ "方言腔1": {
280
+ "gpt_weight": "MODELS/Mandarin/TJ/TJ.ckpt",
281
+ "sovits_weight": "MODELS/Mandarin/TJ/TJ.pth",
282
+ "default_language": "中文",
283
+ "tones": {
284
+ "tone1": {
285
+ "sample": "MODELS/Mandarin/TJ/A0003_S004_0_G0001_1.WAV.mp3",
286
+ "example_voice_wav": "MODELS/Mandarin/TJ/A0003_S004_0_G0001_1.WAV.mp3",
287
+ "example_voice_wav_words": "咱今个儿,咱今个儿,咱聊聊这个旅游吧。好吗?",
288
+ },
289
+ "tone2": {
290
+ "sample": "MODELS/Mandarin/TJ/A0003_S004_0_G0001_61.WAV.mp3",
291
+ "example_voice_wav": "MODELS/Mandarin/TJ/A0003_S004_0_G0001_61.WAV.mp3",
292
+ "example_voice_wav_words": "分不出来是早点。午饭。晚饭",
293
+ },
294
+ "tone3": {
295
+ "sample": "MODELS/Mandarin/TJ/A0003_S006_0_G0001_189.WAV.mp3",
296
+ "example_voice_wav": "MODELS/Mandarin/TJ/A0003_S006_0_G0001_189.WAV.mp3",
297
+ "example_voice_wav_words": "你看你看中国这踢球,妈的球到你妈就多少人追,都追不上",
298
+ },
299
+ },
300
+ },
301
+
302
+
303
+ "方言腔2": {
304
+ "gpt_weight": "MODELS/Mandarin/ZZ/ZZ.ckpt",
305
+ "sovits_weight": "MODELS/Mandarin/ZZ/ZZ.pth",
306
+ "default_language": "中文",
307
+ "tones": {
308
+ "tone1": {
309
+ "sample": "MODELS/Mandarin/ZZ/A0001_S001_0_G0001_10.WAV.mp3",
310
+ "example_voice_wav": "MODELS/Mandarin/ZZ/A0001_S001_0_G0001_10.WAV.mp3",
311
+ "example_voice_wav_words": "那你说那小嘞是不是都是谁家盖房子用那一种小嘞。",
312
+ },
313
+ "tone2": {
314
+ "sample": "MODELS/Mandarin/ZZ/A0001_S003_0_G0001_2.WAV.mp3",
315
+ "example_voice_wav": "MODELS/Mandarin/ZZ/A0001_S003_0_G0001_2.WAV.mp3",
316
+ "example_voice_wav_words": "我听人家说,你是,学音乐嘞。",
317
+ },
318
+ "tone3": {
319
+ "sample": "MODELS/Mandarin/ZZ/A0001_S003_0_G0001_14.WAV.mp3",
320
+ "example_voice_wav": "MODELS/Mandarin/ZZ/A0001_S003_0_G0001_14.WAV.mp3",
321
+ "example_voice_wav_words": "它不是管儿,它不是管儿,它就是,嗯,哎呀,不知道咋形容,圆咧。",
322
+ },
323
+ },
324
+ },
325
+
326
+
327
 
328
  "白い悪魔": {
329
  "gpt_weight": "MODELS/G1/G1.ckpt",