litagin commited on
Commit
cf61580
1 Parent(s): c811c1e

Add GitHub repo and clean

Browse files
Files changed (1) hide show
  1. app.py +12 -14
app.py CHANGED
@@ -1,14 +1,16 @@
 
 
 
1
  import os
2
- import torch
 
3
 
4
- # os.system("wget -P cvec/ https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt")
5
  import gradio as gr
6
  import librosa
7
- import numpy as np
8
- import logging
9
  from fairseq import checkpoint_utils
10
- from vc_infer_pipeline import VC
11
- import traceback
12
  from config import Config
13
  from lib.infer_pack.models import (
14
  SynthesizerTrnMs256NSFsid,
@@ -16,15 +18,10 @@ from lib.infer_pack.models import (
16
  SynthesizerTrnMs768NSFsid,
17
  SynthesizerTrnMs768NSFsid_nono,
18
  )
19
- import asyncio
20
- import edge_tts
21
- import time
22
- import datetime
23
-
24
  from rmvpe import RMVPE
 
25
 
26
  logging.getLogger("fairseq").setLevel(logging.WARNING)
27
-
28
  logging.getLogger("numba").setLevel(logging.WARNING)
29
  logging.getLogger("markdown_it").setLevel(logging.WARNING)
30
  logging.getLogger("urllib3").setLevel(logging.WARNING)
@@ -82,7 +79,7 @@ def model_data(model_name):
82
  else:
83
  net_g = net_g.float()
84
  vc = VC(tgt_sr, config)
85
- n_spk = cpt["config"][-3]
86
 
87
  index_files = [
88
  f"{model_root}/{model_name}/{f}"
@@ -220,7 +217,8 @@ Input text ➡[(edge-tts)](https://github.com/rany2/edge-tts)➡ Speech mp3 file
220
  Although the models are trained on Japanese voices and intended for Japanese text, they can also be used with other languages with the corresponding edge-tts speaker (but possibly with a Japanese accent).
221
 
222
  Input characters are limited to 280 characters, and the speech audio is limited to 20 seconds in this 🤗 space.
223
- Run locally for longer audio.
 
224
  """
225
 
226
  app = gr.Blocks()
 
1
+ import asyncio
2
+ import datetime
3
+ import logging
4
  import os
5
+ import time
6
+ import traceback
7
 
8
+ import edge_tts
9
  import gradio as gr
10
  import librosa
11
+ import torch
 
12
  from fairseq import checkpoint_utils
13
+
 
14
  from config import Config
15
  from lib.infer_pack.models import (
16
  SynthesizerTrnMs256NSFsid,
 
18
  SynthesizerTrnMs768NSFsid,
19
  SynthesizerTrnMs768NSFsid_nono,
20
  )
 
 
 
 
 
21
  from rmvpe import RMVPE
22
+ from vc_infer_pipeline import VC
23
 
24
  logging.getLogger("fairseq").setLevel(logging.WARNING)
 
25
  logging.getLogger("numba").setLevel(logging.WARNING)
26
  logging.getLogger("markdown_it").setLevel(logging.WARNING)
27
  logging.getLogger("urllib3").setLevel(logging.WARNING)
 
79
  else:
80
  net_g = net_g.float()
81
  vc = VC(tgt_sr, config)
82
+ # n_spk = cpt["config"][-3]
83
 
84
  index_files = [
85
  f"{model_root}/{model_name}/{f}"
 
217
  Although the models are trained on Japanese voices and intended for Japanese text, they can also be used with other languages with the corresponding edge-tts speaker (but possibly with a Japanese accent).
218
 
219
  Input characters are limited to 280 characters, and the speech audio is limited to 20 seconds in this 🤗 space.
220
+
221
+ [Visit this GitHub repo](https://github.com/litagin02/rvc-tts-webui) for running locally with your models!
222
  """
223
 
224
  app = gr.Blocks()