nateraw commited on
Commit
f4e8262
1 Parent(s): 92d6048

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import torch
3
+ from so_vits_svc_fork.hparams import HParams
4
+ import json
5
+ import gradio as gr
6
+ import librosa
7
+ import numpy as np
8
+
9
+ ##########################################################
10
+ # REPLACE THESE VALUES TO CHANGE THE MODEL REPO/CKPT NAME
11
+ ##########################################################
12
+ repo_id = "dog/theovon"
13
+ ckpt_name = None # or specify a ckpt. ex. "G_1257.pth"
14
+ ##########################################################
15
+
16
+ # Figure out the latest generator by taking highest value one.
17
+ # Ex. if the repo has: G_0.pth, G_100.pth, G_200.pth, we'd use G_200.pth
18
+ if ckpt_name is None:
19
+ latest_id = sorted(
20
+ [
21
+ int(Path(x).stem.split("_")[1])
22
+ for x in list_repo_files(repo_id)
23
+ if x.startswith("G_") and x.endswith(".pth")
24
+ ]
25
+ )[-1]
26
+ ckpt_name = f"G_{latest_id}.pth"
27
+
28
+ generator_path = hf_hub_download(repo_id, ckpt_name)
29
+ config_path = hf_hub_download(repo_id, "config.json")
30
+ hparams = HParams(**json.loads(Path(config_path).read_text()))
31
+ speakers = list(hparams.spk.keys())
32
+ device = "cuda" if torch.cuda.is_available() else "cpu"
33
+ model = Svc(net_g_path=generator_path, config_path=config_path, device=device, cluster_model_path=None)
34
+
35
+
36
+ def predict(
37
+ speaker,
38
+ audio,
39
+ transpose: int = 0,
40
+ auto_predict_f0: bool = False,
41
+ cluster_infer_ratio: float = 0,
42
+ noise_scale: float = 0.4,
43
+ f0_method: str = "crepe",
44
+ db_thresh: int = -40,
45
+ pad_seconds: float = 0.5,
46
+ chunk_seconds: float = 0.5,
47
+ absolute_thresh: bool = False,
48
+ ):
49
+ audio, _ = librosa.load(audio, sr=model.target_sample)
50
+ audio = model.infer_silence(
51
+ audio.astype(np.float32),
52
+ speaker=speaker,
53
+ transpose=transpose,
54
+ auto_predict_f0=auto_predict_f0,
55
+ cluster_infer_ratio=cluster_infer_ratio,
56
+ noise_scale=noise_scale,
57
+ f0_method=f0_method,
58
+ db_thresh=db_thresh,
59
+ pad_seconds=pad_seconds,
60
+ chunk_seconds=chunk_seconds,
61
+ absolute_thresh=absolute_thresh,
62
+ )
63
+ return model.target_sample, audio
64
+
65
+
66
+ interface = gr.Interface(
67
+ predict,
68
+ inputs=[
69
+ gr.Dropdown(speakers, value=speakers[0], label="Target Speaker"),
70
+ gr.Audio(type="filepath", source="microphone", label="Source Audio"),
71
+ gr.Slider(-12, 12, value=0, step=1, label="Transpose (Semitones)"),
72
+ gr.Checkbox(False, label="Auto Predict F0"),
73
+ gr.Slider(0.0, 1.0, value=0.0, step=0.1, label='cluster infer ratio'),
74
+ gr.Slider(0.0, 1.0, value=0.4, step=0.1, label="noise scale"),
75
+ gr.Dropdown(choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"], value='crepe', label="f0 method"),
76
+ ],
77
+ outputs="audio",
78
+ title="Voice Cloning",
79
+ description=f"""
80
+ This app uses models trained with so-vits-svc-fork to clone your voice. Model currently being used is https://hf.co/{repo_id}. To change the model being served, duplicate the space and update the repo_id in `app.py`.
81
+ """.strip(),
82
+ article="""
83
+ <p style='text-align: center'>
84
+ <a href='https://github.com/voicepaw/so-vits-svc-fork' target='_blank'>Github Repo</a>
85
+ </p>
86
+ """
87
+ )
88
+
89
+ if __name__ == '__main__':
90
+ interface.launch()