Spaces:
Running
Running
File size: 5,307 Bytes
e976963 278bd08 e976963 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
from multiprocessing import cpu_count
from pathlib import Path
import torch
from fairseq import checkpoint_utils
from scipy.io import wavfile
from infer_pack.models import (
SynthesizerTrnMs256NSFsid,
SynthesizerTrnMs256NSFsid_nono,
SynthesizerTrnMs768NSFsid,
SynthesizerTrnMs768NSFsid_nono,
)
from my_utils import load_audio
from vc_infer_pipeline import VC
BASE_DIR = Path(__file__).resolve().parent.parent
class Config:
def __init__(self, device, is_half):
self.device = 'cpu'
self.is_half = True
self.n_cpu = 0
self.gpu_name = None
self.gpu_mem = None
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
def device_config(self) -> tuple:
if torch.cuda.is_available():
i_device = int(self.device.split(":")[-1])
self.gpu_name = torch.cuda.get_device_name(i_device)
if (
("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
or "P40" in self.gpu_name.upper()
or "1060" in self.gpu_name
or "1070" in self.gpu_name
or "1080" in self.gpu_name
):
print("16 series/10 series P40 forced single precision")
self.is_half = False
for config_file in ["32k.json", "40k.json", "48k.json"]:
with open(BASE_DIR / "src" / "configs" / config_file, "r") as f:
strr = f.read().replace("true", "false")
with open(BASE_DIR / "src" / "configs" / config_file, "w") as f:
f.write(strr)
with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "r") as f:
strr = f.read().replace("3.7", "3.0")
with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "w") as f:
f.write(strr)
else:
self.gpu_name = None
self.gpu_mem = int(
torch.cuda.get_device_properties(i_device).total_memory
/ 1024
/ 1024
/ 1024
+ 0.4
)
if self.gpu_mem <= 4:
with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "r") as f:
strr = f.read().replace("3.7", "3.0")
with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "w") as f:
f.write(strr)
if self.n_cpu == 0:
self.n_cpu = cpu_count()
if self.is_half:
# 6G memory config
x_pad = 3
x_query = 10
x_center = 60
x_max = 65
else:
# 5G memory config
x_pad = 1
x_query = 6
x_center = 38
x_max = 41
if self.gpu_mem != None and self.gpu_mem <= 4:
x_pad = 1
x_query = 5
x_center = 30
x_max = 32
return x_pad, x_query, x_center, x_max
def load_hubert(device, is_half, model_path):
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task([model_path], suffix='', )
hubert = models[0]
hubert = hubert.to(device)
if is_half:
hubert = hubert.half()
else:
hubert = hubert.float()
hubert.eval()
return hubert
def get_vc(device, is_half, config, model_path):
cpt = torch.load(model_path, map_location='cpu')
if "config" not in cpt or "weight" not in cpt:
raise ValueError(f'Incorrect format for {model_path}. Use a voice model trained using RVC v2 instead.')
tgt_sr = cpt["config"][-1]
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]
if_f0 = cpt.get("f0", 1)
version = cpt.get("version", "v1")
if version == "v1":
if if_f0 == 1:
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half)
else:
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
elif version == "v2":
if if_f0 == 1:
net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=is_half)
else:
net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
del net_g.enc_q
print(net_g.load_state_dict(cpt["weight"], strict=False))
net_g.eval().to(device)
if is_half:
net_g = net_g.half()
else:
net_g = net_g.float()
vc = VC(tgt_sr, config)
return cpt, version, net_g, tgt_sr, vc
def rvc_infer(
index_path,
index_rate,
input_path,
output_path,
pitch_change,
f0_method,
cpt,
version,
net_g,
filter_radius,
tgt_sr,
rms_mix_rate,
protect,
crepe_hop_length,
vc,
hubert_model
):
audio = load_audio(input_path, 16000)
times = [0, 0, 0]
if_f0 = cpt.get('f0', 1)
audio_opt = vc.pipeline(
hubert_model,
net_g,
0,
audio,
input_path,
times,
pitch_change,
f0_method,
index_path,
index_rate,
if_f0,
filter_radius,
tgt_sr,
0,
rms_mix_rate,
version,
protect,
crepe_hop_length
)
wavfile.write(output_path, tgt_sr, audio_opt)
|