Hev832 commited on
Commit
525511e
1 Parent(s): 081ad1f

Delete lib/infer_libs/rvc.py

Browse files
Files changed (1) hide show
  1. lib/infer_libs/rvc.py +0 -147
lib/infer_libs/rvc.py DELETED
@@ -1,147 +0,0 @@
1
- from multiprocessing import cpu_count
2
- from pathlib import Path
3
-
4
- import torch
5
- from fairseq import checkpoint_utils
6
- from scipy.io import wavfile
7
-
8
- from infer_pack.models import (
9
- SynthesizerTrnMs256NSFsid,
10
- SynthesizerTrnMs256NSFsid_nono,
11
- SynthesizerTrnMs768NSFsid,
12
- SynthesizerTrnMs768NSFsid_nono,
13
- )
14
- from my_utils import load_audio
15
- from infer import VC
16
-
17
- BASE_DIR = Path(__file__).resolve().parent
18
-
19
- class Config:
20
- def __init__(self, device, is_half):
21
- self.device = device
22
- self.is_half = is_half
23
- self.n_cpu = 0
24
- self.gpu_name = None
25
- self.gpu_mem = None
26
- self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
27
-
28
- def device_config(self) -> tuple:
29
- if torch.cuda.is_available():
30
- i_device = int(self.device.split(":")[-1])
31
- self.gpu_name = torch.cuda.get_device_name(i_device)
32
- if (
33
- ("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
34
- or "P40" in self.gpu_name.upper()
35
- or "1060" in self.gpu_name
36
- or "1070" in self.gpu_name
37
- or "1080" in self.gpu_name
38
- ):
39
- print("16 series/10 series P40 forced single precision")
40
- self.is_half = False
41
- else:
42
- self.gpu_name = None
43
- self.gpu_mem = int(
44
- torch.cuda.get_device_properties(i_device).total_memory
45
- / 1024
46
- / 1024
47
- / 1024
48
- + 0.4
49
- )
50
- elif torch.backends.mps.is_available():
51
- print("No supported N-card found, use MPS for inference")
52
- self.device = "mps"
53
- else:
54
- print("No supported N-card found, use CPU for inference")
55
- self.device = "cpu"
56
- self.is_half = True
57
-
58
- if self.n_cpu == 0:
59
- self.n_cpu = cpu_count()
60
-
61
- if self.is_half:
62
- # 6G memory config
63
- x_pad = 3
64
- x_query = 10
65
- x_center = 60
66
- x_max = 65
67
- else:
68
- # 5G memory config
69
- x_pad = 1
70
- x_query = 6
71
- x_center = 38
72
- x_max = 41
73
-
74
- if self.gpu_mem is not None and self.gpu_mem <= 4:
75
- x_pad = 1
76
- x_query = 5
77
- x_center = 30
78
- x_max = 32
79
-
80
- return x_pad, x_query, x_center, x_max
81
-
82
- def load_hubert(device, is_half, model_path):
83
- models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task([model_path], suffix='')
84
- hubert = models[0]
85
- hubert = hubert.to(device)
86
-
87
- if is_half:
88
- hubert = hubert.half()
89
- else:
90
- hubert = hubert.float()
91
-
92
- hubert.eval()
93
- return hubert
94
-
95
- def get_vc(device, is_half, config, model_path):
96
- cpt = torch.load(model_path, map_location='cpu')
97
- if "config" not in cpt or "weight" not in cpt:
98
- raise ValueError(f'Incorrect format for {model_path}. Use a voice model trained using RVC v2 instead.')
99
-
100
- tgt_sr = cpt["config"][-1]
101
- cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]
102
- if_f0 = cpt.get("f0", 1)
103
- version = cpt.get("version", "v1")
104
-
105
- if version == "v1":
106
- if if_f0 == 1:
107
- net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half)
108
- else:
109
- net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
110
- elif version == "v2":
111
- if if_f0 == 1:
112
- net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=is_half)
113
- else:
114
- net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
115
-
116
- del net_g.enc_q
117
- print(net_g.load_state_dict(cpt["weight"], strict=False))
118
- net_g.eval().to(device)
119
-
120
- if is_half:
121
- net_g = net_g.half()
122
- else:
123
- net_g = net_g.float()
124
-
125
- vc = VC(tgt_sr, config)
126
- return cpt, version, net_g, tgt_sr, vc
127
-
128
- def rvc_infer(index_path, index_rate, input_path, output_path, pitch_change, f0_method, cpt, version, net_g, filter_radius, tgt_sr, rms_mix_rate, protect, crepe_hop_length, vc, hubert_model):
129
- # Load the input audio file
130
- audio = load_audio(input_path, 16000)
131
-
132
- # Initialize a list to keep track of times
133
- times = [0, 0, 0]
134
-
135
- # Determine if F0 (fundamental frequency) is used
136
- if_f0 = cpt.get('f0', 1)
137
-
138
- # Run the voice conversion pipeline
139
- audio_opt = vc.pipeline(
140
- hubert_model, net_g, 0, audio, input_path, times,
141
- pitch_change, f0_method, index_path, index_rate,
142
- if_f0, filter_radius, tgt_sr, 0, rms_mix_rate,
143
- version, protect, crepe_hop_length
144
- )
145
-
146
- # Write the output audio to a file
147
- wavfile.write(output_path, tgt_sr, audio_opt)