Spaces:

Altadmin
/

voice

Runtime error

App Files Files Community

Altadmin commited on Jul 8, 2023

Commit

890b83f

•

1 Parent(s): f4dd36d

Upload 38 files

Browse files

Files changed (38) hide show

Dockerfile +13 -0
MDXNet.py +274 -0
RVC.py +1318 -0
RVC_class.py +85 -0
RVC_functions.py +76 -0
Retrieval_based_Voice_Conversion_WebUI.ipynb +381 -0
Retrieval_based_Voice_Conversion_WebUI_v2.ipynb +401 -0
app.py +322 -0
config.py +123 -0
envfilescheck.bat +348 -0
export_onnx.py +54 -0
extract_f0_print.py +160 -0
extract_feature_print.py +123 -0
extract_locale.py +31 -0
ffmpeg +0 -0
go-realtime-gui.bat +2 -0
go-web.bat +2 -0
gui.py +698 -0
host.json +15 -0
hubert_base.pt +3 -0
i18n.py +28 -0
infer-web.py +1999 -0
infer_batch_rvc.py +220 -0
infer_uvr5.py +364 -0
local.settings.json +8 -0
main.py +41 -0
my_utils.py +35 -0
onnx_inference_demo.py +20 -0
poetry.lock +0 -0
pyproject.toml +61 -0
requirements-win-for-realtime_vc_gui.txt +28 -0
requirements.txt +55 -0
slicer2.py +260 -0
temp.py +1392 -0
test.py +5 -0
train_nsf_sim_cache_sid_load_pretrain.py +595 -0
trainset_preprocess_pipeline_print.py +139 -0
vc_infer_pipeline.py +449 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,13 @@

+# syntax=docker/dockerfile:1
+FROM python:3.10-bullseye
+EXPOSE 7865
+WORKDIR /app
+COPY . .
+RUN pip3 install -r requirements.txt
+CMD ["python3", "infer-web.py"]

MDXNet.py ADDED Viewed

	@@ -0,0 +1,274 @@

+import soundfile as sf
+import torch, pdb, time, argparse, os, warnings, sys, librosa
+import numpy as np
+import onnxruntime as ort
+from scipy.io.wavfile import write
+from tqdm import tqdm
+import torch
+import torch.nn as nn
+dim_c = 4
+class Conv_TDF_net_trim:
+    def __init__(
+        self, device, model_name, target_name, L, dim_f, dim_t, n_fft, hop=1024
+    ):
+        super(Conv_TDF_net_trim, self).__init__()
+        self.dim_f = dim_f
+        self.dim_t = 2**dim_t
+        self.n_fft = n_fft
+        self.hop = hop
+        self.n_bins = self.n_fft // 2 + 1
+        self.chunk_size = hop * (self.dim_t - 1)
+        self.window = torch.hann_window(window_length=self.n_fft, periodic=True).to(
+            device
+        )
+        self.target_name = target_name
+        self.blender = "blender" in model_name
+        out_c = dim_c * 4 if target_name == "*" else dim_c
+        self.freq_pad = torch.zeros(
+            [1, out_c, self.n_bins - self.dim_f, self.dim_t]
+        ).to(device)
+        self.n = L // 2
+    def stft(self, x):
+        x = x.reshape([-1, self.chunk_size])
+        x = torch.stft(
+            x,
+            n_fft=self.n_fft,
+            hop_length=self.hop,
+            window=self.window,
+            center=True,
+            return_complex=True,
+        )
+        x = torch.view_as_real(x)
+        x = x.permute([0, 3, 1, 2])
+        x = x.reshape([-1, 2, 2, self.n_bins, self.dim_t]).reshape(
+            [-1, dim_c, self.n_bins, self.dim_t]
+        )
+        return x[:, :, : self.dim_f]
+    def istft(self, x, freq_pad=None):
+        freq_pad = (
+            self.freq_pad.repeat([x.shape[0], 1, 1, 1])
+            if freq_pad is None
+            else freq_pad
+        )
+        x = torch.cat([x, freq_pad], -2)
+        c = 4 * 2 if self.target_name == "*" else 2
+        x = x.reshape([-1, c, 2, self.n_bins, self.dim_t]).reshape(
+            [-1, 2, self.n_bins, self.dim_t]
+        )
+        x = x.permute([0, 2, 3, 1])
+        x = x.contiguous()
+        x = torch.view_as_complex(x)
+        x = torch.istft(
+            x, n_fft=self.n_fft, hop_length=self.hop, window=self.window, center=True
+        )
+        return x.reshape([-1, c, self.chunk_size])
+def get_models(device, dim_f, dim_t, n_fft):
+    return Conv_TDF_net_trim(
+        device=device,
+        model_name="Conv-TDF",
+        target_name="vocals",
+        L=11,
+        dim_f=dim_f,
+        dim_t=dim_t,
+        n_fft=n_fft,
+    )
+warnings.filterwarnings("ignore")
+cpu = torch.device("cpu")
+if torch.cuda.is_available():
+    device = torch.device("cuda:0")
+elif torch.backends.mps.is_available():
+    device = torch.device("mps")
+else:
+    device = torch.device("cpu")
+class Predictor:
+    def __init__(self, args):
+        self.args = args
+        self.model_ = get_models(
+            device=cpu, dim_f=args.dim_f, dim_t=args.dim_t, n_fft=args.n_fft
+        )
+        self.model = ort.InferenceSession(
+            os.path.join(args.onnx, self.model_.target_name + ".onnx"),
+            providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
+        )
+        print("onnx load done")
+    def demix(self, mix):
+        samples = mix.shape[-1]
+        margin = self.args.margin
+        chunk_size = self.args.chunks * 44100
+        assert not margin == 0, "margin cannot be zero!"
+        if margin > chunk_size:
+            margin = chunk_size
+        segmented_mix = {}
+        if self.args.chunks == 0 or samples < chunk_size:
+            chunk_size = samples
+        counter = -1
+        for skip in range(0, samples, chunk_size):
+            counter += 1
+            s_margin = 0 if counter == 0 else margin
+            end = min(skip + chunk_size + margin, samples)
+            start = skip - s_margin
+            segmented_mix[skip] = mix[:, start:end].copy()
+            if end == samples:
+                break
+        sources = self.demix_base(segmented_mix, margin_size=margin)
+        """
+        mix:(2,big_sample)
+        segmented_mix:offset->(2,small_sample)
+        sources:(1,2,big_sample)
+        """
+        return sources
+    def demix_base(self, mixes, margin_size):
+        chunked_sources = []
+        progress_bar = tqdm(total=len(mixes))
+        progress_bar.set_description("Processing")
+        for mix in mixes:
+            cmix = mixes[mix]
+            sources = []
+            n_sample = cmix.shape[1]
+            model = self.model_
+            trim = model.n_fft // 2
+            gen_size = model.chunk_size - 2 * trim
+            pad = gen_size - n_sample % gen_size
+            mix_p = np.concatenate(
+                (np.zeros((2, trim)), cmix, np.zeros((2, pad)), np.zeros((2, trim))), 1
+            )
+            mix_waves = []
+            i = 0
+            while i < n_sample + pad:
+                waves = np.array(mix_p[:, i : i + model.chunk_size])
+                mix_waves.append(waves)
+                i += gen_size
+            mix_waves = torch.tensor(mix_waves, dtype=torch.float32).to(cpu)
+            with torch.no_grad():
+                _ort = self.model
+                spek = model.stft(mix_waves)
+                if self.args.denoise:
+                    spec_pred = (
+                        -_ort.run(None, {"input": -spek.cpu().numpy()})[0] * 0.5
+                        + _ort.run(None, {"input": spek.cpu().numpy()})[0] * 0.5
+                    )
+                    tar_waves = model.istft(torch.tensor(spec_pred))
+                else:
+                    tar_waves = model.istft(
+                        torch.tensor(_ort.run(None, {"input": spek.cpu().numpy()})[0])
+                    )
+                tar_signal = (
+                    tar_waves[:, :, trim:-trim]
+                    .transpose(0, 1)
+                    .reshape(2, -1)
+                    .numpy()[:, :-pad]
+                )
+                start = 0 if mix == 0 else margin_size
+                end = None if mix == list(mixes.keys())[::-1][0] else -margin_size
+                if margin_size == 0:
+                    end = None
+                sources.append(tar_signal[:, start:end])
+                progress_bar.update(1)
+            chunked_sources.append(sources)
+        _sources = np.concatenate(chunked_sources, axis=-1)
+        # del self.model
+        progress_bar.close()
+        return _sources
+    def prediction(self, m, vocal_root, others_root, format):
+        os.makedirs(vocal_root, exist_ok=True)
+        os.makedirs(others_root, exist_ok=True)
+        basename = os.path.basename(m)
+        mix, rate = librosa.load(m, mono=False, sr=44100)
+        if mix.ndim == 1:
+            mix = np.asfortranarray([mix, mix])
+        mix = mix.T
+        sources = self.demix(mix.T)
+        opt = sources[0].T
+        if format in ["wav", "flac"]:
+            sf.write(
+                "%s/%s_main_vocal.%s" % (vocal_root, basename, format), mix - opt, rate
+            )
+            sf.write("%s/%s_others.%s" % (others_root, basename, format), opt, rate)
+        else:
+            path_vocal = "%s/%s_main_vocal.wav" % (vocal_root, basename)
+            path_other = "%s/%s_others.wav" % (others_root, basename)
+            sf.write(path_vocal, mix - opt, rate)
+            sf.write(path_other, opt, rate)
+            if os.path.exists(path_vocal):
+                os.system(
+                    "ffmpeg -i %s -vn %s -q:a 2 -y"
+                    % (path_vocal, path_vocal[:-4] + ".%s" % format)
+                )
+            if os.path.exists(path_other):
+                os.system(
+                    "ffmpeg -i %s -vn %s -q:a 2 -y"
+                    % (path_other, path_other[:-4] + ".%s" % format)
+                )
+class MDXNetDereverb:
+    def __init__(self, chunks):
+        self.onnx = "uvr5_weights/onnx_dereverb_By_FoxJoy"
+        self.shifts = 10  #'Predict with randomised equivariant stabilisation'
+        self.mixing = "min_mag"  # ['default','min_mag','max_mag']
+        self.chunks = chunks
+        self.margin = 44100
+        self.dim_t = 9
+        self.dim_f = 3072
+        self.n_fft = 6144
+        self.denoise = True
+        self.pred = Predictor(self)
+    def _path_audio_(self, input, vocal_root, others_root, format):
+        self.pred.prediction(input, vocal_root, others_root, format)
+if __name__ == "__main__":
+    dereverb = MDXNetDereverb(15)
+    from time import time as ttime
+    t0 = ttime()
+    dereverb._path_audio_(
+        "雪雪伴奏对消HP5.wav",
+        "vocal",
+        "others",
+    )
+    t1 = ttime()
+    print(t1 - t0)
+"""
+runtime\python.exe MDXNet.py
+6G:
+15/9:0.8G->6.8G
+14:0.8G->6.5G
+25:炸
+half15:0.7G->6.6G,22.69s
+fp32-15:0.7G->6.6G,20.85s
+"""

RVC.py ADDED Viewed

	@@ -0,0 +1,1318 @@

+import os
+import shutil
+import requests
+import sys
+import tempfile
+import soundfile as sf
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+import traceback, pdb
+import warnings
+import numpy as np
+import torch
+from pydub import AudioSegment
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
+os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1"
+import logging
+import threading
+from random import shuffle
+from subprocess import Popen
+from time import sleep
+import faiss
+import ffmpeg
+import gradio as gr
+import soundfile as sf
+from config import Config
+from fairseq import checkpoint_utils
+from i18n import I18nAuto
+from infer_pack.models import (
+    SynthesizerTrnMs256NSFsid,
+    SynthesizerTrnMs256NSFsid_nono,
+    SynthesizerTrnMs768NSFsid,
+    SynthesizerTrnMs768NSFsid_nono,
+)
+from infer_pack.models_onnx import SynthesizerTrnMsNSFsidM
+from infer_uvr5 import _audio_pre_, _audio_pre_new
+from MDXNet import MDXNetDereverb
+from my_utils import load_audio
+from train.process_ckpt import change_info, extract_small_model, merge, show_info
+from vc_infer_pipeline import VC
+from sklearn.cluster import MiniBatchKMeans
+logging.getLogger("numba").setLevel(logging.WARNING)
+tmp = os.path.join(now_dir, "TEMP")
+shutil.rmtree(tmp, ignore_errors=True)
+shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True)
+shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True)
+os.makedirs(tmp, exist_ok=True)
+os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
+os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True)
+os.environ["TEMP"] = tmp
+warnings.filterwarnings("ignore")
+torch.manual_seed(114514)
+config = Config()
+i18n = I18nAuto()
+i18n.print()
+# 判断是否有能用来训练和加速推理的N卡
+ngpu = torch.cuda.device_count()
+gpu_infos = []
+mem = []
+if_gpu_ok = False
+if torch.cuda.is_available() or ngpu != 0:
+    for i in range(ngpu):
+        gpu_name = torch.cuda.get_device_name(i)
+        if any(
+            value in gpu_name.upper()
+            for value in [
+                "10",
+                "16",
+                "20",
+                "30",
+                "40",
+                "A2",
+                "A3",
+                "A4",
+                "P4",
+                "A50",
+                "500",
+                "A60",
+                "70",
+                "80",
+                "90",
+                "M4",
+                "T4",
+                "TITAN",
+            ]
+        ):
+            # A10#A100#V100#A40#P40#M40#K80#A4500
+            if_gpu_ok = True  # 至少有一张能用的N卡
+            gpu_infos.append("%s\t%s" % (i, gpu_name))
+            mem.append(
+                int(
+                    torch.cuda.get_device_properties(i).total_memory
+                    / 1024
+                    / 1024
+                    / 1024
+                    + 0.4
+                )
+            )
+if if_gpu_ok and len(gpu_infos) > 0:
+    gpu_info = "\n".join(gpu_infos)
+    default_batch_size = 1
+else:
+    gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练")
+    default_batch_size = 1
+gpus = "-".join([i[0] for i in gpu_infos])
+class ToolButton(gr.Button, gr.components.FormComponent):
+    """Small button with single emoji as text, fits inside gradio forms"""
+    def __init__(self, **kwargs):
+        super().__init__(variant="tool", **kwargs)
+    def get_block_name(self):
+        return "button"
+hubert_model = None
+def load_hubert():
+    global hubert_model
+    models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
+        ["hubert_base.pt"],
+        suffix="",
+    )
+    hubert_model = models[0]
+    hubert_model = hubert_model.to(config.device)
+    if config.is_half:
+        hubert_model = hubert_model.half()
+    else:
+        hubert_model = hubert_model.float()
+    hubert_model.eval()
+weight_root = "weights"
+weight_uvr5_root = "uvr5_weights"
+index_root = "logs"
+names = []
+for name in os.listdir(weight_root):
+    if name.endswith(".pth"):
+        names.append(name)
+index_paths = []
+for root, dirs, files in os.walk(index_root, topdown=False):
+    for name in files:
+        if name.endswith(".index") and "trained" not in name:
+            index_paths.append("%s/%s" % (root, name))
+uvr5_names = []
+for name in os.listdir(weight_uvr5_root):
+    if name.endswith(".pth") or "onnx" in name:
+        uvr5_names.append(name.replace(".pth", ""))
+def vc_single(
+    sid,
+    input_audio_path,
+    f0_up_key,
+    f0_file,
+    f0_method,
+    file_index,
+    file_index2,
+    # file_big_npy,
+    index_rate,
+    filter_radius,
+    resample_sr,
+    rms_mix_rate,
+    protect,
+    song
+):  # spk_item, input_audio0, vc_transform0,f0_file,f0method0
+    global tgt_sr, net_g, vc, hubert_model, version
+    logging.info(f0_up_key)
+    if input_audio_path is None:
+        return "You need to upload an audio", None
+    f0_up_key = int(f0_up_key)
+    try:
+        # logging.info("before response")
+        # response = requests.get(input_audio_path, stream=True)
+        # response.raise_for_status()
+        # logging.info("after response")
+        # temp_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
+        # path = temp_file.name
+        # for chunk in response.iter_content(chunk_size=1024):
+        #     temp_file.write(chunk)
+        # with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
+        #     path = temp_file.name
+        #     logging.info("Temporary file path: %s", path)
+        #     temp_file.write(response.content)
+        # logging.info("tempfilewrite")
+        if not input_audio_path.lower().endswith(".wav"):
+            # Download the audio file
+            response = requests.get(input_audio_path, stream=True)
+            response.raise_for_status()
+            logging.info("after response")
+            # Create a temporary file and save the downloaded audio content
+            with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
+                path = temp_file.name
+                logging.info("Temporary file path: %s", path)
+                temp_file.write(response.content)
+            # Convert the audio file to WAV format using pydub
+            audio = AudioSegment.from_file(path)
+            output_file_path = os.path.splitext(path)[0] + ".wav"
+            audio.export(output_file_path, format="wav")
+            # Use the converted WAV file as the new temporary file
+            path = output_file_path
+        else:
+            # The input audio URL is already pointing to a WAV file, so use it as-is
+            response = requests.get(input_audio_path)
+            response.raise_for_status()
+            # Create a temporary file and save the downloaded audio content
+            with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
+                path = temp_file.name
+                logging.info("Temporary file path: %s", path)
+                temp_file.write(response.content)
+        if song == True:
+            uvr(path)
+            path = f"{os.getcwd()}/opt/vocal.wav"
+        audio = load_audio(path, 16000)
+        temp_file.close()
+        audio_max = np.abs(audio).max() / 0.95
+        if audio_max > 1:
+            audio = audio.astype(float)
+            audio /= audio_max
+        times = [0, 0, 0]
+        if not hubert_model:
+            load_hubert()
+        if_f0 = cpt.get("f0", 1)
+        file_index = (
+            (
+                file_index.strip(" ")
+                .strip('"')
+                .strip("\n")
+                .strip('"')
+                .strip(" ")
+                .replace("trained", "added")
+            )
+            if file_index != ""
+            else file_index2
+        )  # 防止小白写错，自动帮他替换掉
+        # file_big_npy = (
+        #     file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+        # )
+        logging.info("before vc pipeline")
+        logging.info(vc)
+        audio_opt = vc.pipeline(
+            hubert_model,
+            net_g,
+            sid,
+            audio,
+            input_audio_path,
+            times,
+            f0_up_key,
+            f0_method,
+            file_index,
+            # file_big_npy,
+            index_rate,
+            if_f0,
+            filter_radius,
+            tgt_sr,
+            resample_sr,
+            rms_mix_rate,
+            version,
+            protect,
+            f0_file=f0_file,
+        )
+        logging.info("after vc pipeline")
+        logging.info(f0_up_key)
+        if tgt_sr != resample_sr >= 16000:
+            tgt_sr = resample_sr
+        index_info = (
+            "Using index:%s." % file_index
+            if os.path.exists(file_index)
+            else "Index not used."
+        )
+        return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
+            index_info,
+            times[0],
+            times[1],
+            times[2],
+        ), (tgt_sr, audio_opt)
+    except:
+        info = traceback.format_exc()
+        logging.info(info)
+        return info, (None, None)
+def vc_multi(
+    sid,
+    dir_path,
+    opt_root,
+    paths,
+    f0_up_key,
+    f0_method,
+    file_index,
+    file_index2,
+    # file_big_npy,
+    index_rate,
+    filter_radius,
+    resample_sr,
+    rms_mix_rate,
+    protect,
+    format1,
+):
+    try:
+        dir_path = (
+            dir_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+        )  # 防止小白拷路径头尾带了空格和"和回车
+        opt_root = opt_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+        os.makedirs(opt_root, exist_ok=True)
+        try:
+            if dir_path != "":
+                paths = [os.path.join(dir_path, name) for name in os.listdir(dir_path)]
+            else:
+                paths = [path.name for path in paths]
+        except:
+            traceback.print_exc()
+            paths = [path.name for path in paths]
+        infos = []
+        for path in paths:
+            info, opt = vc_single(
+                sid,
+                path,
+                f0_up_key,
+                None,
+                f0_method,
+                file_index,
+                file_index2,
+                # file_big_npy,
+                index_rate,
+                filter_radius,
+                resample_sr,
+                rms_mix_rate,
+                protect,
+            )
+            if "Success" in info:
+                try:
+                    tgt_sr, audio_opt = opt
+                    if format1 in ["wav", "flac"]:
+                        sf.write(
+                            "%s/%s.%s" % (opt_root, os.path.basename(path), format1),
+                            audio_opt,
+                            tgt_sr,
+                        )
+                    else:
+                        path = "%s/%s.wav" % (opt_root, os.path.basename(path))
+                        sf.write(
+                            path,
+                            audio_opt,
+                            tgt_sr,
+                        )
+                        if os.path.exists(path):
+                            os.system(
+                                "ffmpeg -i %s -vn %s -q:a 2 -y"
+                                % (path, path[:-4] + ".%s" % format1)
+                            )
+                except:
+                    info += traceback.format_exc()
+            infos.append("%s->%s" % (os.path.basename(path), info))
+            yield "\n".join(infos)
+        yield "\n".join(infos)
+    except:
+        yield traceback.format_exc()
+def uvr(inp_root, model_name=uvr5_names[0], save_root_vocal='opt', paths=None, save_root_ins="opt", agg=10, format0="wav"):
+    try:
+        func = _audio_pre_
+        pre_fun = func(
+            agg=int(agg),
+            model_path=os.path.join(weight_uvr5_root, model_name + ".pth"),
+            device=config.device,
+            is_half=config.is_half,
+        )
+        pre_fun._path_audio_(
+            inp_root, save_root_ins, save_root_vocal, format0
+        )
+    finally:
+        try:
+            del pre_fun.model
+            del pre_fun
+        except:
+            traceback.print_exc()
+        logging.info("clean_empty_cache")
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+# 一个选项卡全局只能有一个音色
+def get_vc(sid, to_return_protect0, to_return_protect1):
+    global n_spk, tgt_sr, net_g, vc, cpt, version
+    if sid == "" or sid == []:
+        global hubert_model
+        if hubert_model is not None:  # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
+            logging.info("clean_empty_cache")
+            del net_g, n_spk, vc, hubert_model, tgt_sr  # ,cpt
+            hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            ###楼下不这么折腾清理不干净
+            if_f0 = cpt.get("f0", 1)
+            version = cpt.get("version", "v1")
+            if version == "v1":
+                if if_f0 == 1:
+                    net_g = SynthesizerTrnMs256NSFsid(
+                        *cpt["config"], is_half=config.is_half
+                    )
+                else:
+                    net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
+            elif version == "v2":
+                if if_f0 == 1:
+                    net_g = SynthesizerTrnMs768NSFsid(
+                        *cpt["config"], is_half=config.is_half
+                    )
+                else:
+                    net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
+            del net_g, cpt
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            cpt = None
+        return {"visible": False, "__type__": "update"}
+    person = "%s/%s" % (weight_root, sid)
+    logging.info("loading %s" % person)
+    cpt = torch.load(person, map_location="cpu")
+    tgt_sr = cpt["config"][-1]
+    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
+    if_f0 = cpt.get("f0", 1)
+    if if_f0 == 0:
+        to_return_protect0 = to_return_protect1 = {
+            "visible": False,
+            "value": 0.5,
+            "__type__": "update",
+        }
+    else:
+        to_return_protect0 = {
+            "visible": True,
+            "value": to_return_protect0,
+            "__type__": "update",
+        }
+        to_return_protect1 = {
+            "visible": True,
+            "value": to_return_protect1,
+            "__type__": "update",
+        }
+    version = cpt.get("version", "v1")
+    if version == "v1":
+        if if_f0 == 1:
+            net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
+        else:
+            net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
+    elif version == "v2":
+        if if_f0 == 1:
+            net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)
+        else:
+            net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
+    del net_g.enc_q
+    logging.info(net_g.load_state_dict(cpt["weight"], strict=False))
+    net_g.eval().to(config.device)
+    if config.is_half:
+        net_g = net_g.half()
+    else:
+        net_g = net_g.float()
+    vc = VC(tgt_sr, config)
+    n_spk = cpt["config"][-3]
+    return (
+        {"visible": True, "maximum": n_spk, "__type__": "update"},
+        to_return_protect0,
+        to_return_protect1,
+    )
+def change_choices():
+    names = []
+    for name in os.listdir(weight_root):
+        if name.endswith(".pth"):
+            names.append(name)
+    index_paths = []
+    for root, dirs, files in os.walk(index_root, topdown=False):
+        for name in files:
+            if name.endswith(".index") and "trained" not in name:
+                index_paths.append("%s/%s" % (root, name))
+    return {"choices": sorted(names), "__type__": "update"}, {
+        "choices": sorted(index_paths),
+        "__type__": "update",
+    }
+def clean():
+    return {"value": "", "__type__": "update"}
+sr_dict = {
+    "32k": 32000,
+    "40k": 40000,
+    "48k": 48000,
+}
+def if_done(done, p):
+    while 1:
+        if p.poll() is None:
+            sleep(0.5)
+        else:
+            break
+    done[0] = True
+def if_done_multi(done, ps):
+    while 1:
+        # poll==None代表进程未结束
+        # 只要有一个进程未结束都不停
+        flag = 1
+        for p in ps:
+            if p.poll() is None:
+                flag = 0
+                sleep(0.5)
+                break
+        if flag == 1:
+            break
+    done[0] = True
+def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
+    sr = sr_dict[sr]
+    os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
+    f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w")
+    f.close()
+    cmd = (
+        config.python_cmd
+        + " trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s "
+        % (trainset_dir, sr, n_p, now_dir, exp_dir)
+        + str(config.noparallel)
+    )
+    logging.info(cmd)
+    p = Popen(cmd, shell=True)  # , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir
+    ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
+    done = [False]
+    threading.Thread(
+        target=if_done,
+        args=(
+            done,
+            p,
+        ),
+    ).start()
+    while 1:
+        with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
+            yield (f.read())
+        sleep(1)
+        if done[0]:
+            break
+    with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
+        log = f.read()
+    logging.info(log)
+    yield log
+# but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2])
+def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19):
+    gpus = gpus.split("-")
+    os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
+    f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w")
+    f.close()
+    if if_f0:
+        cmd = config.python_cmd + " extract_f0_print.py %s/logs/%s %s %s" % (
+            now_dir,
+            exp_dir,
+            n_p,
+            f0method,
+        )
+        logging.info(cmd)
+        p = Popen(cmd, shell=True, cwd=now_dir)  # , stdin=PIPE, stdout=PIPE,stderr=PIPE
+        ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
+        done = [False]
+        threading.Thread(
+            target=if_done,
+            args=(
+                done,
+                p,
+            ),
+        ).start()
+        while 1:
+            with open(
+                "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r"
+            ) as f:
+                yield (f.read())
+            sleep(1)
+            if done[0]:
+                break
+        with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
+            log = f.read()
+        logging.info(log)
+        yield log
+    ####对不同part分别开多进程
+    """
+    n_part=int(sys.argv[1])
+    i_part=int(sys.argv[2])
+    i_gpu=sys.argv[3]
+    exp_dir=sys.argv[4]
+    os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
+    """
+    leng = len(gpus)
+    ps = []
+    for idx, n_g in enumerate(gpus):
+        cmd = (
+            config.python_cmd
+            + " extract_feature_print.py %s %s %s %s %s/logs/%s %s"
+            % (
+                config.device,
+                leng,
+                idx,
+                n_g,
+                now_dir,
+                exp_dir,
+                version19,
+            )
+        )
+        logging.info(cmd)
+        p = Popen(
+            cmd, shell=True, cwd=now_dir
+        )  # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
+        ps.append(p)
+    ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
+    done = [False]
+    threading.Thread(
+        target=if_done_multi,
+        args=(
+            done,
+            ps,
+        ),
+    ).start()
+    while 1:
+        with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
+            yield (f.read())
+        sleep(1)
+        if done[0]:
+            break
+    with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
+        log = f.read()
+    logging.info(log)
+    yield log
+def change_sr2(sr2, if_f0_3, version19):
+    path_str = "" if version19 == "v1" else "_v2"
+    f0_str = "f0" if if_f0_3 else ""
+    if_pretrained_generator_exist = os.access(
+        "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK
+    )
+    if_pretrained_discriminator_exist = os.access(
+        "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK
+    )
+    if not if_pretrained_generator_exist:
+        logging.info(
+            "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2),
+            "not exist, will not use pretrained model",
+        )
+    if not if_pretrained_discriminator_exist:
+        logging.info(
+            "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2),
+            "not exist, will not use pretrained model",
+        )
+    return (
+        "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)
+        if if_pretrained_generator_exist
+        else "",
+        "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
+        if if_pretrained_discriminator_exist
+        else "",
+    )
+def change_version19(sr2, if_f0_3, version19):
+    path_str = "" if version19 == "v1" else "_v2"
+    if sr2 == "32k" and version19 == "v1":
+        sr2 = "40k"
+    to_return_sr2 = (
+        {"choices": ["40k", "48k"], "__type__": "update", "value": sr2}
+        if version19 == "v1"
+        else {"choices": ["40k", "48k", "32k"], "__type__": "update", "value": sr2}
+    )
+    f0_str = "f0" if if_f0_3 else ""
+    if_pretrained_generator_exist = os.access(
+        "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK
+    )
+    if_pretrained_discriminator_exist = os.access(
+        "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK
+    )
+    if not if_pretrained_generator_exist:
+        logging.info(
+            "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2),
+            "not exist, will not use pretrained model",
+        )
+    if not if_pretrained_discriminator_exist:
+        logging.info(
+            "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2),
+            "not exist, will not use pretrained model",
+        )
+    return (
+        "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)
+        if if_pretrained_generator_exist
+        else "",
+        "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
+        if if_pretrained_discriminator_exist
+        else "",
+        to_return_sr2,
+    )
+def change_f0(if_f0_3, sr2, version19):  # f0method8,pretrained_G14,pretrained_D15
+    path_str = "" if version19 == "v1" else "_v2"
+    if_pretrained_generator_exist = os.access(
+        "pretrained%s/f0G%s.pth" % (path_str, sr2), os.F_OK
+    )
+    if_pretrained_discriminator_exist = os.access(
+        "pretrained%s/f0D%s.pth" % (path_str, sr2), os.F_OK
+    )
+    if not if_pretrained_generator_exist:
+        logging.info(
+            "pretrained%s/f0G%s.pth" % (path_str, sr2),
+            "not exist, will not use pretrained model",
+        )
+    if not if_pretrained_discriminator_exist:
+        logging.info(
+            "pretrained%s/f0D%s.pth" % (path_str, sr2),
+            "not exist, will not use pretrained model",
+        )
+    if if_f0_3:
+        return (
+            {"visible": True, "__type__": "update"},
+            "pretrained%s/f0G%s.pth" % (path_str, sr2)
+            if if_pretrained_generator_exist
+            else "",
+            "pretrained%s/f0D%s.pth" % (path_str, sr2)
+            if if_pretrained_discriminator_exist
+            else "",
+        )
+    return (
+        {"visible": False, "__type__": "update"},
+        ("pretrained%s/G%s.pth" % (path_str, sr2))
+        if if_pretrained_generator_exist
+        else "",
+        ("pretrained%s/D%s.pth" % (path_str, sr2))
+        if if_pretrained_discriminator_exist
+        else "",
+    )
+# but3.click(click_train,[exp_dir1,sr2,if_f0_3,save_epoch10,total_epoch11,batch_size12,if_save_latest13,pretrained_G14,pretrained_D15,gpus16])
+def click_train(
+    exp_dir1,
+    sr2,
+    if_f0_3,
+    spk_id5,
+    save_epoch10,
+    total_epoch11,
+    batch_size12,
+    if_save_latest13,
+    pretrained_G14,
+    pretrained_D15,
+    gpus16,
+    if_cache_gpu17,
+    if_save_every_weights18,
+    version19,
+):
+    # 生成filelist
+    exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
+    os.makedirs(exp_dir, exist_ok=True)
+    gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir)
+    feature_dir = (
+        "%s/3_feature256" % (exp_dir)
+        if version19 == "v1"
+        else "%s/3_feature768" % (exp_dir)
+    )
+    if if_f0_3:
+        f0_dir = "%s/2a_f0" % (exp_dir)
+        f0nsf_dir = "%s/2b-f0nsf" % (exp_dir)
+        names = (
+            set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
+            & set([name.split(".")[0] for name in os.listdir(feature_dir)])
+            & set([name.split(".")[0] for name in os.listdir(f0_dir)])
+            & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
+        )
+    else:
+        names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
+            [name.split(".")[0] for name in os.listdir(feature_dir)]
+        )
+    opt = []
+    for name in names:
+        if if_f0_3:
+            opt.append(
+                "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s"
+                % (
+                    gt_wavs_dir.replace("\\", "\\\\"),
+                    name,
+                    feature_dir.replace("\\", "\\\\"),
+                    name,
+                    f0_dir.replace("\\", "\\\\"),
+                    name,
+                    f0nsf_dir.replace("\\", "\\\\"),
+                    name,
+                    spk_id5,
+                )
+            )
+        else:
+            opt.append(
+                "%s/%s.wav|%s/%s.npy|%s"
+                % (
+                    gt_wavs_dir.replace("\\", "\\\\"),
+                    name,
+                    feature_dir.replace("\\", "\\\\"),
+                    name,
+                    spk_id5,
+                )
+            )
+    fea_dim = 256 if version19 == "v1" else 768
+    if if_f0_3:
+        for _ in range(2):
+            opt.append(
+                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
+                % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
+            )
+    else:
+        for _ in range(2):
+            opt.append(
+                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
+                % (now_dir, sr2, now_dir, fea_dim, spk_id5)
+            )
+    shuffle(opt)
+    with open("%s/filelist.txt" % exp_dir, "w") as f:
+        f.write("\n".join(opt))
+    logging.info("write filelist done")
+    # 生成config#无需生成config
+    # cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0"
+    logging.info("use gpus:", gpus16)
+    if pretrained_G14 == "":
+        logging.info("no pretrained Generator")
+    if pretrained_D15 == "":
+        logging.info("no pretrained Discriminator")
+    if gpus16:
+        cmd = (
+            config.python_cmd
+            + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
+            % (
+                exp_dir1,
+                sr2,
+                1 if if_f0_3 else 0,
+                batch_size12,
+                gpus16,
+                total_epoch11,
+                save_epoch10,
+                "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "",
+                "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "",
+                1 if if_save_latest13 == i18n("是") else 0,
+                1 if if_cache_gpu17 == i18n("是") else 0,
+                1 if if_save_every_weights18 == i18n("是") else 0,
+                version19,
+            )
+        )
+    else:
+        cmd = (
+            config.python_cmd
+            + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
+            % (
+                exp_dir1,
+                sr2,
+                1 if if_f0_3 else 0,
+                batch_size12,
+                total_epoch11,
+                save_epoch10,
+                "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "\b",
+                "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "\b",
+                1 if if_save_latest13 == i18n("是") else 0,
+                1 if if_cache_gpu17 == i18n("是") else 0,
+                1 if if_save_every_weights18 == i18n("是") else 0,
+                version19,
+            )
+        )
+    logging.info(cmd)
+    p = Popen(cmd, shell=True, cwd=now_dir)
+    p.wait()
+    return "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log"
+# but4.click(train_index, [exp_dir1], info3)
+def train_index(exp_dir1, version19):
+    exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
+    os.makedirs(exp_dir, exist_ok=True)
+    feature_dir = (
+        "%s/3_feature256" % (exp_dir)
+        if version19 == "v1"
+        else "%s/3_feature768" % (exp_dir)
+    )
+    if not os.path.exists(feature_dir):
+        return "请先进行特征提取!"
+    listdir_res = list(os.listdir(feature_dir))
+    if len(listdir_res) == 0:
+        return "请先进行特征提取！"
+    infos = []
+    npys = []
+    for name in sorted(listdir_res):
+        phone = np.load("%s/%s" % (feature_dir, name))
+        npys.append(phone)
+    big_npy = np.concatenate(npys, 0)
+    big_npy_idx = np.arange(big_npy.shape[0])
+    np.random.shuffle(big_npy_idx)
+    big_npy = big_npy[big_npy_idx]
+    if big_npy.shape[0] > 2e5:
+        # if(1):
+        infos.append("Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0])
+        yield "\n".join(infos)
+        try:
+            big_npy = (
+                MiniBatchKMeans(
+                    n_clusters=10000,
+                    verbose=True,
+                    batch_size=256 * config.n_cpu,
+                    compute_labels=False,
+                    init="random",
+                )
+                .fit(big_npy)
+                .cluster_centers_
+            )
+        except:
+            info = traceback.format_exc()
+            logging.info(info)
+            infos.append(info)
+            yield "\n".join(infos)
+    np.save("%s/total_fea.npy" % exp_dir, big_npy)
+    n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
+    infos.append("%s,%s" % (big_npy.shape, n_ivf))
+    yield "\n".join(infos)
+    index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf)
+    # index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf)
+    infos.append("training")
+    yield "\n".join(infos)
+    index_ivf = faiss.extract_index_ivf(index)  #
+    index_ivf.nprobe = 1
+    index.train(big_npy)
+    faiss.write_index(
+        index,
+        "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
+    )
+    # faiss.write_index(index, '%s/trained_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19))
+    infos.append("adding")
+    yield "\n".join(infos)
+    batch_size_add = 8192
+    for i in range(0, big_npy.shape[0], batch_size_add):
+        index.add(big_npy[i : i + batch_size_add])
+    faiss.write_index(
+        index,
+        "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
+    )
+    infos.append(
+        "成功构建索引，added_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (n_ivf, index_ivf.nprobe, exp_dir1, version19)
+    )
+    # faiss.write_index(index, '%s/added_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19))
+    # infos.append("成功构建索引，added_IVF%s_Flat_FastScan_%s.index"%(n_ivf,version19))
+    yield "\n".join(infos)
+# but5.click(train1key, [exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0method8, save_epoch10, total_epoch11, batch_size12, if_save_latest13, pretrained_G14, pretrained_D15, gpus16, if_cache_gpu17], info3)
+def train1key(
+    exp_dir1,
+    sr2,
+    if_f0_3,
+    trainset_dir4,
+    spk_id5,
+    np7,
+    f0method8,
+    save_epoch10,
+    total_epoch11,
+    batch_size12,
+    if_save_latest13,
+    pretrained_G14,
+    pretrained_D15,
+    gpus16,
+    if_cache_gpu17,
+    if_save_every_weights18,
+    version19,
+):
+    infos = []
+    def get_info_str(strr):
+        infos.append(strr)
+        return "\n".join(infos)
+    model_log_dir = "%s/logs/%s" % (now_dir, exp_dir1)
+    preprocess_log_path = "%s/preprocess.log" % model_log_dir
+    extract_f0_feature_log_path = "%s/extract_f0_feature.log" % model_log_dir
+    gt_wavs_dir = "%s/0_gt_wavs" % model_log_dir
+    feature_dir = (
+        "%s/3_feature256" % model_log_dir
+        if version19 == "v1"
+        else "%s/3_feature768" % model_log_dir
+    )
+    os.makedirs(model_log_dir, exist_ok=True)
+    #########step1:处理数据
+    open(preprocess_log_path, "w").close()
+    cmd = (
+        config.python_cmd
+        + " trainset_preprocess_pipeline_print.py %s %s %s %s "
+        % (trainset_dir4, sr_dict[sr2], np7, model_log_dir)
+        + str(config.noparallel)
+    )
+    yield get_info_str(i18n("step1:正在处理数据"))
+    yield get_info_str(cmd)
+    p = Popen(cmd, shell=True)
+    p.wait()
+    with open(preprocess_log_path, "r") as f:
+        logging.info(f.read())
+    #########step2a:提取音高
+    open(extract_f0_feature_log_path, "w")
+    if if_f0_3:
+        yield get_info_str("step2a:正在提取音高")
+        cmd = config.python_cmd + " extract_f0_print.py %s %s %s" % (
+            model_log_dir,
+            np7,
+            f0method8,
+        )
+        yield get_info_str(cmd)
+        p = Popen(cmd, shell=True, cwd=now_dir)
+        p.wait()
+        with open(extract_f0_feature_log_path, "r") as f:
+            logging.info(f.read())
+    else:
+        yield get_info_str(i18n("step2a:无需提取音高"))
+    #######step2b:提取特征
+    yield get_info_str(i18n("step2b:正在提取特征"))
+    gpus = gpus16.split("-")
+    leng = len(gpus)
+    ps = []
+    for idx, n_g in enumerate(gpus):
+        cmd = config.python_cmd + " extract_feature_print.py %s %s %s %s %s %s" % (
+            config.device,
+            leng,
+            idx,
+            n_g,
+            model_log_dir,
+            version19,
+        )
+        yield get_info_str(cmd)
+        p = Popen(
+            cmd, shell=True, cwd=now_dir
+        )  # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
+        ps.append(p)
+    for p in ps:
+        p.wait()
+    with open(extract_f0_feature_log_path, "r") as f:
+        logging.info(f.read())
+    #######step3a:训练模型
+    yield get_info_str(i18n("step3a:正在训练模型"))
+    # 生成filelist
+    if if_f0_3:
+        f0_dir = "%s/2a_f0" % model_log_dir
+        f0nsf_dir = "%s/2b-f0nsf" % model_log_dir
+        names = (
+            set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
+            & set([name.split(".")[0] for name in os.listdir(feature_dir)])
+            & set([name.split(".")[0] for name in os.listdir(f0_dir)])
+            & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
+        )
+    else:
+        names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
+            [name.split(".")[0] for name in os.listdir(feature_dir)]
+        )
+    opt = []
+    for name in names:
+        if if_f0_3:
+            opt.append(
+                "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s"
+                % (
+                    gt_wavs_dir.replace("\\", "\\\\"),
+                    name,
+                    feature_dir.replace("\\", "\\\\"),
+                    name,
+                    f0_dir.replace("\\", "\\\\"),
+                    name,
+                    f0nsf_dir.replace("\\", "\\\\"),
+                    name,
+                    spk_id5,
+                )
+            )
+        else:
+            opt.append(
+                "%s/%s.wav|%s/%s.npy|%s"
+                % (
+                    gt_wavs_dir.replace("\\", "\\\\"),
+                    name,
+                    feature_dir.replace("\\", "\\\\"),
+                    name,
+                    spk_id5,
+                )
+            )
+    fea_dim = 256 if version19 == "v1" else 768
+    if if_f0_3:
+        for _ in range(2):
+            opt.append(
+                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
+                % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
+            )
+    else:
+        for _ in range(2):
+            opt.append(
+                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
+                % (now_dir, sr2, now_dir, fea_dim, spk_id5)
+            )
+    shuffle(opt)
+    with open("%s/filelist.txt" % model_log_dir, "w") as f:
+        f.write("\n".join(opt))
+    yield get_info_str("write filelist done")
+    if gpus16:
+        cmd = (
+            config.python_cmd
+            + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
+            % (
+                exp_dir1,
+                sr2,
+                1 if if_f0_3 else 0,
+                batch_size12,
+                gpus16,
+                total_epoch11,
+                save_epoch10,
+                "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "",
+                "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "",
+                1 if if_save_latest13 == i18n("是") else 0,
+                1 if if_cache_gpu17 == i18n("是") else 0,
+                1 if if_save_every_weights18 == i18n("是") else 0,
+                version19,
+            )
+        )
+    else:
+        cmd = (
+            config.python_cmd
+            + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
+            % (
+                exp_dir1,
+                sr2,
+                1 if if_f0_3 else 0,
+                batch_size12,
+                total_epoch11,
+                save_epoch10,
+                "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "",
+                "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "",
+                1 if if_save_latest13 == i18n("是") else 0,
+                1 if if_cache_gpu17 == i18n("是") else 0,
+                1 if if_save_every_weights18 == i18n("是") else 0,
+                version19,
+            )
+        )
+    yield get_info_str(cmd)
+    p = Popen(cmd, shell=True, cwd=now_dir)
+    p.wait()
+    yield get_info_str(i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log"))
+    #######step3b:训练索引
+    npys = []
+    listdir_res = list(os.listdir(feature_dir))
+    for name in sorted(listdir_res):
+        phone = np.load("%s/%s" % (feature_dir, name))
+        npys.append(phone)
+    big_npy = np.concatenate(npys, 0)
+    big_npy_idx = np.arange(big_npy.shape[0])
+    np.random.shuffle(big_npy_idx)
+    big_npy = big_npy[big_npy_idx]
+    if big_npy.shape[0] > 2e5:
+        # if(1):
+        info = "Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0]
+        logging.info(info)
+        yield get_info_str(info)
+        try:
+            big_npy = (
+                MiniBatchKMeans(
+                    n_clusters=10000,
+                    verbose=True,
+                    batch_size=256 * config.n_cpu,
+                    compute_labels=False,
+                    init="random",
+                )
+                .fit(big_npy)
+                .cluster_centers_
+            )
+        except:
+            info = traceback.format_exc()
+            logging.info(info)
+            yield get_info_str(info)
+    np.save("%s/total_fea.npy" % model_log_dir, big_npy)
+    # n_ivf =  big_npy.shape[0] // 39
+    n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
+    yield get_info_str("%s,%s" % (big_npy.shape, n_ivf))
+    index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf)
+    yield get_info_str("training index")
+    index_ivf = faiss.extract_index_ivf(index)  #
+    index_ivf.nprobe = 1
+    index.train(big_npy)
+    faiss.write_index(
+        index,
+        "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
+    )
+    yield get_info_str("adding index")
+    batch_size_add = 8192
+    for i in range(0, big_npy.shape[0], batch_size_add):
+        index.add(big_npy[i : i + batch_size_add])
+    faiss.write_index(
+        index,
+        "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
+    )
+    yield get_info_str(
+        "成功构建索引, added_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (n_ivf, index_ivf.nprobe, exp_dir1, version19)
+    )
+    yield get_info_str(i18n("全流程结束！"))
+#                    ckpt_path2.change(change_info_,[ckpt_path2],[sr__,if_f0__])
+def change_info_(ckpt_path):
+    if not os.path.exists(ckpt_path.replace(os.path.basename(ckpt_path), "train.log")):
+        return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
+    try:
+        with open(
+            ckpt_path.replace(os.path.basename(ckpt_path), "train.log"), "r"
+        ) as f:
+            info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1])
+            sr, f0 = info["sample_rate"], info["if_f0"]
+            version = "v2" if ("version" in info and info["version"] == "v2") else "v1"
+            return sr, str(f0), version
+    except:
+        traceback.print_exc()
+        return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
+def export_onnx(ModelPath, ExportedPath):
+    cpt = torch.load(ModelPath, map_location="cpu")
+    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]
+    vec_channels = 256 if cpt.get("version", "v1") == "v1" else 768
+    test_phone = torch.rand(1, 200, vec_channels)  # hidden unit
+    test_phone_lengths = torch.tensor([200]).long()  # hidden unit 长度（貌似没啥用）
+    test_pitch = torch.randint(size=(1, 200), low=5, high=255)  # 基频（单位赫兹）
+    test_pitchf = torch.rand(1, 200)  # nsf基频
+    test_ds = torch.LongTensor([0])  # 说话人ID
+    test_rnd = torch.rand(1, 192, 200)  # 噪声（加入随机因子）
+    device = "cpu"  # 导出时设备（不影响使用模型）
+    net_g = SynthesizerTrnMsNSFsidM(
+        *cpt["config"], is_half=False, version=cpt.get("version", "v1")
+    )  # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
+    net_g.load_state_dict(cpt["weight"], strict=False)
+    input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
+    output_names = [
+        "audio",
+    ]
+    # net_g.construct_spkmixmap(n_speaker) 多角色混合轨道导出
+    torch.onnx.export(
+        net_g,
+        (
+            test_phone.to(device),
+            test_phone_lengths.to(device),
+            test_pitch.to(device),
+            test_pitchf.to(device),
+            test_ds.to(device),
+            test_rnd.to(device),
+        ),
+        ExportedPath,
+        dynamic_axes={
+            "phone": [1],
+            "pitch": [1],
+            "pitchf": [1],
+            "rnd": [2],
+        },
+        do_constant_folding=False,
+        opset_version=13,
+        verbose=False,
+        input_names=input_names,
+        output_names=output_names,
+    )
+    return "Finished"

RVC_class.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import RVC
+from scipy.io import wavfile
+import numpy as np
+import os
+import uuid
+import io
+import requests
+class VoiceConverter:
+    def __init__(self):
+        self.models = RVC.names
+    def single_run(self, input_audio, model_name, vc_transform, song, opt_input="opt", f0_method="crepe",
+                   filter_radius=3, file_index1="", file_index2="", index_rate1=0.75,
+                   resample_sr=0, rms_mix_rate=0.25, protect=0.33, f0_file=None):
+        RVC.get_vc(model_name, protect, protect)
+        spk_item = 0
+        vc_output1, vc_output2 = RVC.vc_single(
+            spk_item,
+            input_audio,
+            vc_transform,
+            f0_file,
+            f0_method,
+            file_index1,
+            file_index2,
+            index_rate1,
+            filter_radius,
+            resample_sr,
+            rms_mix_rate,
+            protect,
+            song
+        )
+        random_string = str(uuid.uuid4())
+        filename = os.path.basename(input_audio)
+        name, extension = os.path.splitext(filename)
+        new_file_name = f"{name}_{random_string}{extension}"
+        sample_rate, audio_data = vc_output2
+        url = self.upload_audio(audio_data, sample_rate, new_file_name)
+        return url
+    def upload_audio(self, audio_data, sample_rate, filename):
+        try:
+            url = "https://tmpfiles.org/api/v1/upload"
+            # Convert audio data to WAV format in memory
+            wav_bytes = io.BytesIO()
+            scaled_audio_data = np.int16(audio_data)
+            wavfile.write(wav_bytes, sample_rate, scaled_audio_data)
+            wav_bytes.seek(0)
+            files = {'file': (filename, wav_bytes)}
+            response = requests.post(url, files=files)
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            raise RuntimeError(f"Failed to upload audio: {e}")
+    def uvr(self, dir_wav_input, wav_inputs=None, model_choose=RVC.uvr5_names[0],
+            opt_vocal_root="opt", opt_ins_root="opt", format0="wav"):
+        agg = 10
+        vc_output4 = RVC.uvr(
+            model_choose,
+            dir_wav_input,
+            opt_vocal_root,
+            wav_inputs,
+            opt_ins_root,
+            agg,
+            format0
+        )
+        for value in vc_output4:
+            print(value)
+# converter.uvr('/home/teewhy/Downloads/around_the_world-atc.wav')

RVC_functions.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import RVC
+from scipy.io import wavfile
+import numpy as np
+import os
+import uuid
+import io
+import requests
+def get_models():
+    return RVC.names
+def single_run(input_audio0, model_name, vc_transform, opt_input="opt", f0method0 = "pm", filter_radius0 = 3, file_index1 = "", file_index2 = "",
+            index_rate1 = 0.75, resample_sr0 = 0, rms_mix_rate0 = 0.25, protect0 = 0.33, f0_file = None):
+    RVC.get_vc(model_name, protect0, protect0)
+    spk_item = 0
+    # file_index2 = sorted(index_paths)
+    # f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"))
+    vc_output1, vc_output2 = RVC.vc_single(
+        spk_item,
+        input_audio0,
+        vc_transform,
+        f0_file,
+        f0method0,
+        file_index1,
+        file_index2,
+        index_rate1,
+        filter_radius0,
+        resample_sr0,
+        rms_mix_rate0,
+        protect0
+    )
+    random_string = str(uuid.uuid4())
+    filename = os.path.basename(input_audio0)
+    name, extension = os.path.splitext(filename)
+    new_file_name = f"{name}_{random_string}{extension}"
+    sample_rate, audio_data = vc_output2
+    # scaled_audio_data = np.int16(audio_data)
+    # wavfile.write(new_file_name, sample_rate, scaled_audio_data)
+    # response = requests.post(f"https://filebin.net/dajeii61xk3c4oxi/{new_file_name}" , files=files)
+    url="https://filebin.net/dajeii61xk3c4oxi"
+    print(upload_audio(url, audio_data, sample_rate, new_file_name))
+    return f"{url}/{new_file_name}"
+def upload_audio(url, audio_data, sample_rate, filename):
+    try:
+        # Convert audio data to WAV format in memory
+        wav_bytes = io.BytesIO()
+        scaled_audio_data = np.int16(audio_data)
+        wavfile.write(wav_bytes, sample_rate, scaled_audio_data)
+        wav_bytes.seek(0)
+        files = {'file': (filename, wav_bytes)}
+        response = requests.post(f"{url}/{filename}", files=files)
+        response.raise_for_status()
+        return response.json()
+    except Exception as e:
+        raise RuntimeError(f"Failed to upload audio: {e}")
+def uvr(dir_wav_input, wav_inputs = None, model_choose = RVC.uvr5_names[0], opt_vocal_root ="opt", opt_ins_root = "opt", format0 = "wav"):
+    vc_output4 = RVC.uvr(
+        dir_wav_input,
+    )
+    for value in vc_output4:
+        print(value)
+print(single_run("https://www.learningcontainer.com/wp-content/uploads/2020/02/Kalimba.mp3", "mymodelbilawal.pth", -2))

Retrieval_based_Voice_Conversion_WebUI.ipynb ADDED Viewed

	@@ -0,0 +1,381 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "private_outputs": true,
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU",
+    "gpuClass": "standard"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)"
+      ],
+      "metadata": {
+        "id": "ZFFCx5J80SGa"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GmFP6bN9dvOq"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 查看显卡\n",
+        "!nvidia-smi"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 安装依赖\n",
+        "!apt-get -y install build-essential python3-dev ffmpeg\n",
+        "!pip3 install --upgrade setuptools wheel\n",
+        "!pip3 install --upgrade pip\n",
+        "!pip3 install faiss-cpu==1.7.2 fairseq gradio==3.14.0 ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2"
+      ],
+      "metadata": {
+        "id": "wjddIFr1oS3W"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 克隆仓库\n",
+        "\n",
+        "!git clone --depth=1 -b stable https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI\n",
+        "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
+        "!mkdir -p pretrained uvr5_weights"
+      ],
+      "metadata": {
+        "id": "ge_97mfpgqTm"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 更新仓库（一般无需执行）\n",
+        "!git pull"
+      ],
+      "metadata": {
+        "id": "BLDEZADkvlw1"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 安装aria2\n",
+        "!apt -y install -qq aria2"
+      ],
+      "metadata": {
+        "id": "pqE0PrnuRqI2"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 下载底模\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G40k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G48k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D40k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth"
+      ],
+      "metadata": {
+        "id": "UG3XpUwEomUz"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 下载人声分离模型\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth"
+      ],
+      "metadata": {
+        "id": "HugjmZqZRuiF"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 下载hubert_base\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt"
+      ],
+      "metadata": {
+        "id": "2RCaT9FTR0ej"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 挂载谷歌云盘\n",
+        "\n",
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "metadata": {
+        "id": "jwu07JgqoFON"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 从谷歌云盘加载打包好的数据集到/content/dataset\n",
+        "\n",
+        "#@markdown 数据集位置\n",
+        "DATASET = \"/content/drive/MyDrive/dataset/lulu20230327_32k.zip\"  #@param {type:\"string\"}\n",
+        "\n",
+        "!mkdir -p /content/dataset\n",
+        "!unzip -d /content/dataset -B {DATASET}"
+      ],
+      "metadata": {
+        "id": "Mwk7Q0Loqzjx"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 重命名数据集中的重名文件\n",
+        "!ls -a /content/dataset/\n",
+        "!rename 's/(\\w+)\\.(\\w+)~(\\d*)/$1_$3.$2/' /content/dataset/*.*~*"
+      ],
+      "metadata": {
+        "id": "PDlFxWHWEynD"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 启动web\n",
+        "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
+        "# %load_ext tensorboard\n",
+        "# %tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n",
+        "!python3 infer-web.py --colab --pycmd python3"
+      ],
+      "metadata": {
+        "id": "7vh6vphDwO0b"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 手动将训练后的模型文件备份到谷歌云盘\n",
+        "#@markdown 需要自己查看logs文件夹下模型的文件名，手动修改下方命令末尾的文件名\n",
+        "\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 模型epoch\n",
+        "MODELEPOCH = 9600  #@param {type:\"integer\"}\n",
+        "\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/added_*.index /content/drive/MyDrive/\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/total_*.npy /content/drive/MyDrive/\n",
+        "\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth"
+      ],
+      "metadata": {
+        "id": "FgJuNeAwx5Y_"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 从谷歌云盘恢复pth\n",
+        "#@markdown 需要自己查看logs文件夹下模型的文件名，手动修改下方命令末尾的文件名\n",
+        "\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 模型epoch\n",
+        "MODELEPOCH = 7500  #@param {type:\"integer\"}\n",
+        "\n",
+        "!mkdir -p /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
+        "\n",
+        "!cp /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n",
+        "!cp /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n",
+        "!cp /content/drive/MyDrive/*.index /content/\n",
+        "!cp /content/drive/MyDrive/*.npy /content/\n",
+        "!cp /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth"
+      ],
+      "metadata": {
+        "id": "OVQoLQJXS7WX"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 手动预处理（不推荐）\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 采样率\n",
+        "BITRATE = 48000  #@param {type:\"integer\"}\n",
+        "#@markdown 使用的进程数\n",
+        "THREADCOUNT = 8  #@param {type:\"integer\"}\n",
+        "\n",
+        "!python3 trainset_preprocess_pipeline_print.py /content/dataset {BITRATE} {THREADCOUNT} logs/{MODELNAME} True\n"
+      ],
+      "metadata": {
+        "id": "ZKAyuKb9J6dz"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 手动提取特征（不推荐）\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 使用的进程数\n",
+        "THREADCOUNT = 8  #@param {type:\"integer\"}\n",
+        "#@markdown 音高提取算法\n",
+        "ALGO = \"harvest\"  #@param {type:\"string\"}\n",
+        "\n",
+        "!python3 extract_f0_print.py logs/{MODELNAME} {THREADCOUNT} {ALGO}\n",
+        "\n",
+        "!python3 extract_feature_print.py cpu 1 0 0 logs/{MODELNAME}\n"
+      ],
+      "metadata": {
+        "id": "CrxJqzAUKmPJ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 手动训练（不推荐）\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 使用的GPU\n",
+        "USEGPU = \"0\"  #@param {type:\"string\"}\n",
+        "#@markdown 批大小\n",
+        "BATCHSIZE = 32  #@param {type:\"integer\"}\n",
+        "#@markdown 停止的epoch\n",
+        "MODELEPOCH = 3200  #@param {type:\"integer\"}\n",
+        "#@markdown 保存epoch间隔\n",
+        "EPOCHSAVE = 100  #@param {type:\"integer\"}\n",
+        "#@markdown 采样率\n",
+        "MODELSAMPLE = \"48k\"  #@param {type:\"string\"}\n",
+        "#@markdown 是否缓存训练集\n",
+        "CACHEDATA = 1  #@param {type:\"integer\"}\n",
+        "#@markdown 是否仅保存最新的ckpt文件\n",
+        "ONLYLATEST = 0  #@param {type:\"integer\"}\n",
+        "\n",
+        "!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr {MODELSAMPLE} -f0 1 -bs {BATCHSIZE} -g {USEGPU} -te {MODELEPOCH} -se {EPOCHSAVE} -pg pretrained/f0G{MODELSAMPLE}.pth -pd pretrained/f0D{MODELSAMPLE}.pth -l {ONLYLATEST} -c {CACHEDATA}\n"
+      ],
+      "metadata": {
+        "id": "IMLPLKOaKj58"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 删除其它pth，只留选中的（慎点，仔细看代码）\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 选中模型epoch\n",
+        "MODELEPOCH = 9600  #@param {type:\"integer\"}\n",
+        "\n",
+        "!echo \"备份选中的模型。。。\"\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth\n",
+        "\n",
+        "!echo \"正在删除。。。\"\n",
+        "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
+        "!rm /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*.pth\n",
+        "\n",
+        "!echo \"恢复选中的模型。。。\"\n",
+        "!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth \n",
+        "!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n",
+        "\n",
+        "!echo \"删除完成\"\n",
+        "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}"
+      ],
+      "metadata": {
+        "id": "haYA81hySuDl"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 清除项目下所有文件，只留选中的模型（慎点，仔细看代码）\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 选中模型epoch\n",
+        "MODELEPOCH = 9600  #@param {type:\"integer\"}\n",
+        "\n",
+        "!echo \"备份选中的模型。。。\"\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth\n",
+        "\n",
+        "!echo \"正在删除。。。\"\n",
+        "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
+        "!rm -rf /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*\n",
+        "\n",
+        "!echo \"恢复选中的模型。。。\"\n",
+        "!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth \n",
+        "!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n",
+        "\n",
+        "!echo \"删除完成\"\n",
+        "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}"
+      ],
+      "metadata": {
+        "id": "QhSiPTVPoIRh"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}

Retrieval_based_Voice_Conversion_WebUI_v2.ipynb ADDED Viewed

	@@ -0,0 +1,401 @@

+{
+  "cells": [
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ZFFCx5J80SGa"
+      },
+      "source": [
+        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI_v2.ipynb)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GmFP6bN9dvOq"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 查看显卡\n",
+        "!nvidia-smi"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "wjddIFr1oS3W"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 安装依赖\n",
+        "!apt-get -y install build-essential python3-dev ffmpeg\n",
+        "!pip3 install --upgrade setuptools wheel\n",
+        "!pip3 install --upgrade pip\n",
+        "!pip3 install faiss-cpu==1.7.2 fairseq gradio==3.14.0 ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ge_97mfpgqTm"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 克隆仓库\n",
+        "\n",
+        "!mkdir Retrieval-based-Voice-Conversion-WebUI\n",
+        "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
+        "!git init\n",
+        "!git remote add origin https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI.git\n",
+        "!git fetch origin cfd984812804ddc9247d65b14c82cd32e56c1133 --depth=1 \n",
+        "!git reset --hard FETCH_HEAD"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "BLDEZADkvlw1"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 更新仓库（一般无需执行）\n",
+        "!git pull"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "pqE0PrnuRqI2"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 安装aria2\n",
+        "!apt -y install -qq aria2"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "UG3XpUwEomUz"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 下载底模\n",
+        "\n",
+        "# v1\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G40k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G48k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D40k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth\n",
+        "\n",
+        "#v2\n",
+        "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o D32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o D40k.pth\n",
+        "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o D48k.pth\n",
+        "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o G32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o G40k.pth\n",
+        "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o G48k.pth\n",
+        "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0D32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0D40k.pth\n",
+        "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0D48k.pth\n",
+        "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0G32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0G40k.pth\n",
+        "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0G48k.pth"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HugjmZqZRuiF"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 下载人声分离模型\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2RCaT9FTR0ej"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 下载hubert_base\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jwu07JgqoFON"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 挂载谷歌云盘\n",
+        "\n",
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Mwk7Q0Loqzjx"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 从谷歌云盘加载打包好的数据集到/content/dataset\n",
+        "\n",
+        "#@markdown 数据集位置\n",
+        "DATASET = \"/content/drive/MyDrive/dataset/lulu20230327_32k.zip\"  #@param {type:\"string\"}\n",
+        "\n",
+        "!mkdir -p /content/dataset\n",
+        "!unzip -d /content/dataset -B {DATASET}"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "PDlFxWHWEynD"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 重命名数据集中的重名文件\n",
+        "!ls -a /content/dataset/\n",
+        "!rename 's/(\\w+)\\.(\\w+)~(\\d*)/$1_$3.$2/' /content/dataset/*.*~*"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "7vh6vphDwO0b"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 启动web\n",
+        "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
+        "# %load_ext tensorboard\n",
+        "# %tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n",
+        "!python3 infer-web.py --colab --pycmd python3"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FgJuNeAwx5Y_"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 手动将训练后的模型文件备份到谷歌云盘\n",
+        "#@markdown 需要自己查看logs文件夹下模型的文件名，手动修改下方命令末尾的文件名\n",
+        "\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 模型epoch\n",
+        "MODELEPOCH = 9600  #@param {type:\"integer\"}\n",
+        "\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/added_*.index /content/drive/MyDrive/\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/total_*.npy /content/drive/MyDrive/\n",
+        "\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OVQoLQJXS7WX"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 从谷歌云盘恢复pth\n",
+        "#@markdown 需要自己查看logs文件夹下模型的文件名，手动修改下方命令末尾的文件名\n",
+        "\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 模型epoch\n",
+        "MODELEPOCH = 7500  #@param {type:\"integer\"}\n",
+        "\n",
+        "!mkdir -p /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
+        "\n",
+        "!cp /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n",
+        "!cp /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n",
+        "!cp /content/drive/MyDrive/*.index /content/\n",
+        "!cp /content/drive/MyDrive/*.npy /content/\n",
+        "!cp /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ZKAyuKb9J6dz"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 手动预处理（不推荐）\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 采样率\n",
+        "BITRATE = 48000  #@param {type:\"integer\"}\n",
+        "#@markdown 使用的进程数\n",
+        "THREADCOUNT = 8  #@param {type:\"integer\"}\n",
+        "\n",
+        "!python3 trainset_preprocess_pipeline_print.py /content/dataset {BITRATE} {THREADCOUNT} logs/{MODELNAME} True\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CrxJqzAUKmPJ"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 手动提取特征（不推荐）\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 使用的进程数\n",
+        "THREADCOUNT = 8  #@param {type:\"integer\"}\n",
+        "#@markdown 音高提取算法\n",
+        "ALGO = \"harvest\"  #@param {type:\"string\"}\n",
+        "\n",
+        "!python3 extract_f0_print.py logs/{MODELNAME} {THREADCOUNT} {ALGO}\n",
+        "\n",
+        "!python3 extract_feature_print.py cpu 1 0 0 logs/{MODELNAME}\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "IMLPLKOaKj58"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 手动训练（不推荐）\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 使用的GPU\n",
+        "USEGPU = \"0\"  #@param {type:\"string\"}\n",
+        "#@markdown 批大小\n",
+        "BATCHSIZE = 32  #@param {type:\"integer\"}\n",
+        "#@markdown 停止的epoch\n",
+        "MODELEPOCH = 3200  #@param {type:\"integer\"}\n",
+        "#@markdown 保存epoch间隔\n",
+        "EPOCHSAVE = 100  #@param {type:\"integer\"}\n",
+        "#@markdown 采样率\n",
+        "MODELSAMPLE = \"48k\"  #@param {type:\"string\"}\n",
+        "#@markdown 是否缓存训练集\n",
+        "CACHEDATA = 1  #@param {type:\"integer\"}\n",
+        "#@markdown 是否仅保存最新的ckpt文件\n",
+        "ONLYLATEST = 0  #@param {type:\"integer\"}\n",
+        "\n",
+        "!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr {MODELSAMPLE} -f0 1 -bs {BATCHSIZE} -g {USEGPU} -te {MODELEPOCH} -se {EPOCHSAVE} -pg pretrained/f0G{MODELSAMPLE}.pth -pd pretrained/f0D{MODELSAMPLE}.pth -l {ONLYLATEST} -c {CACHEDATA}\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "haYA81hySuDl"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 删除其它pth，只留选中的（慎点，仔细看代码）\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 选中模型epoch\n",
+        "MODELEPOCH = 9600  #@param {type:\"integer\"}\n",
+        "\n",
+        "!echo \"备份选中的模型。。。\"\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth\n",
+        "\n",
+        "!echo \"正在删除。。。\"\n",
+        "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
+        "!rm /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*.pth\n",
+        "\n",
+        "!echo \"恢复选中的模型。。。\"\n",
+        "!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth \n",
+        "!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n",
+        "\n",
+        "!echo \"删除完成\"\n",
+        "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QhSiPTVPoIRh"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 清除项目下所有文件，只留选中的模型（慎点，仔细看代码）\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 选中模型epoch\n",
+        "MODELEPOCH = 9600  #@param {type:\"integer\"}\n",
+        "\n",
+        "!echo \"备份选中的模型。。。\"\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth\n",
+        "\n",
+        "!echo \"正在删除。。。\"\n",
+        "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
+        "!rm -rf /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*\n",
+        "\n",
+        "!echo \"恢复选中的模型。。。\"\n",
+        "!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth \n",
+        "!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n",
+        "\n",
+        "!echo \"删除完成\"\n",
+        "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "private_outputs": true,
+      "provenance": []
+    },
+    "gpuClass": "standard",
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

app.py ADDED Viewed

	@@ -0,0 +1,322 @@

+import io
+import os
+import torch
+# os.system("wget -P cvec/ https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt")
+import gradio as gr
+import librosa
+import numpy as np
+import soundfile
+import logging
+from fairseq import checkpoint_utils
+from my_utils import load_audio
+from vc_infer_pipeline import VC
+import traceback
+from config import Config
+from infer_pack.models import (
+    SynthesizerTrnMs256NSFsid,
+    SynthesizerTrnMs256NSFsid_nono,
+    SynthesizerTrnMs768NSFsid,
+    SynthesizerTrnMs768NSFsid_nono,
+)
+from i18n import I18nAuto
+logging.getLogger("numba").setLevel(logging.WARNING)
+logging.getLogger("markdown_it").setLevel(logging.WARNING)
+logging.getLogger("urllib3").setLevel(logging.WARNING)
+logging.getLogger("matplotlib").setLevel(logging.WARNING)
+i18n = I18nAuto()
+i18n.print()
+config = Config()
+weight_root = "weights"
+weight_uvr5_root = "uvr5_weights"
+index_root = "logs"
+names = []
+hubert_model = None
+for name in os.listdir(weight_root):
+    if name.endswith(".pth"):
+        names.append(name)
+index_paths = []
+for root, dirs, files in os.walk(index_root, topdown=False):
+    for name in files:
+        if name.endswith(".index") and "trained" not in name:
+            index_paths.append("%s/%s" % (root, name))
+def get_vc(sid):
+    global n_spk, tgt_sr, net_g, vc, cpt, version
+    if sid == "" or sid == []:
+        global hubert_model
+        if hubert_model != None:  # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
+            print("clean_empty_cache")
+            del net_g, n_spk, vc, hubert_model, tgt_sr  # ,cpt
+            hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            ###楼下不这么折腾清理不干净
+            if_f0 = cpt.get("f0", 1)
+            version = cpt.get("version", "v1")
+            if version == "v1":
+                if if_f0 == 1:
+                    net_g = SynthesizerTrnMs256NSFsid(
+                        *cpt["config"], is_half=config.is_half
+                    )
+                else:
+                    net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
+            elif version == "v2":
+                if if_f0 == 1:
+                    net_g = SynthesizerTrnMs768NSFsid(
+                        *cpt["config"], is_half=config.is_half
+                    )
+                else:
+                    net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
+            del net_g, cpt
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            cpt = None
+        return {"visible": False, "__type__": "update"}
+    person = "%s/%s" % (weight_root, sid)
+    print("loading %s" % person)
+    cpt = torch.load(person, map_location="cpu")
+    tgt_sr = cpt["config"][-1]
+    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
+    if_f0 = cpt.get("f0", 1)
+    version = cpt.get("version", "v1")
+    if version == "v1":
+        if if_f0 == 1:
+            net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
+        else:
+            net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
+    elif version == "v2":
+        if if_f0 == 1:
+            net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)
+        else:
+            net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
+    del net_g.enc_q
+    print(net_g.load_state_dict(cpt["weight"], strict=False))
+    net_g.eval().to(config.device)
+    if config.is_half:
+        net_g = net_g.half()
+    else:
+        net_g = net_g.float()
+    vc = VC(tgt_sr, config)
+    n_spk = cpt["config"][-3]
+    return {"visible": True, "maximum": n_spk, "__type__": "update"}
+def load_hubert():
+    global hubert_model
+    models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
+        ["hubert_base.pt"],
+        suffix="",
+    )
+    hubert_model = models[0]
+    hubert_model = hubert_model.to(config.device)
+    if config.is_half:
+        hubert_model = hubert_model.half()
+    else:
+        hubert_model = hubert_model.float()
+    hubert_model.eval()
+def vc_single(
+    sid,
+    input_audio_path,
+    f0_up_key,
+    f0_file,
+    f0_method,
+    file_index,
+    file_index2,
+    # file_big_npy,
+    index_rate,
+    filter_radius,
+    resample_sr,
+    rms_mix_rate,
+    protect,
+):  # spk_item, input_audio0, vc_transform0,f0_file,f0method0
+    global tgt_sr, net_g, vc, hubert_model, version
+    if input_audio_path is None:
+        return "You need to upload an audio", None
+    f0_up_key = int(f0_up_key)
+    try:
+        audio = input_audio_path[1] / 32768.0
+        if len(audio.shape) == 2:
+            audio = np.mean(audio, -1)
+        audio = librosa.resample(audio, orig_sr=input_audio_path[0], target_sr=16000)
+        audio_max = np.abs(audio).max() / 0.95
+        if audio_max > 1:
+            audio /= audio_max
+        times = [0, 0, 0]
+        if hubert_model == None:
+            load_hubert()
+        if_f0 = cpt.get("f0", 1)
+        file_index = (
+            (
+                file_index.strip(" ")
+                .strip('"')
+                .strip("\n")
+                .strip('"')
+                .strip(" ")
+                .replace("trained", "added")
+            )
+            if file_index != ""
+            else file_index2
+        )  # 防止小白写错，自动帮他替换掉
+        # file_big_npy = (
+        #     file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+        # )
+        audio_opt = vc.pipeline(
+            hubert_model,
+            net_g,
+            sid,
+            audio,
+            input_audio_path,
+            times,
+            f0_up_key,
+            f0_method,
+            file_index,
+            # file_big_npy,
+            index_rate,
+            if_f0,
+            filter_radius,
+            tgt_sr,
+            resample_sr,
+            rms_mix_rate,
+            version,
+            protect,
+            f0_file=f0_file,
+        )
+        if resample_sr >= 16000 and tgt_sr != resample_sr:
+            tgt_sr = resample_sr
+        index_info = (
+            "Using index:%s." % file_index
+            if os.path.exists(file_index)
+            else "Index not used."
+        )
+        return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
+            index_info,
+            times[0],
+            times[1],
+            times[2],
+        ), (tgt_sr, audio_opt)
+    except:
+        info = traceback.format_exc()
+        print(info)
+        return info, (None, None)
+app = gr.Blocks()
+with app:
+    with gr.Tabs():
+        with gr.TabItem("在线demo"):
+            gr.Markdown(
+                value="""
+                RVC 在线demo
+                """
+            )
+            sid = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names))
+            with gr.Column():
+                spk_item = gr.Slider(
+                    minimum=0,
+                    maximum=2333,
+                    step=1,
+                    label=i18n("请选择说话人id"),
+                    value=0,
+                    visible=False,
+                    interactive=True,
+                )
+            sid.change(
+                fn=get_vc,
+                inputs=[sid],
+                outputs=[spk_item],
+            )
+            gr.Markdown(
+                value=i18n("男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ")
+            )
+            vc_input3 = gr.Audio(label="上传音频（长度小于90秒）")
+            vc_transform0 = gr.Number(label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0)
+            f0method0 = gr.Radio(
+                label=i18n("选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"),
+                choices=["pm", "harvest", "crepe"],
+                value="pm",
+                interactive=True,
+            )
+            filter_radius0 = gr.Slider(
+                minimum=0,
+                maximum=7,
+                label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波，数值为滤波半径，使用可以削弱哑音"),
+                value=3,
+                step=1,
+                interactive=True,
+            )
+            with gr.Column():
+                file_index1 = gr.Textbox(
+                    label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
+                    value="",
+                    interactive=False,
+                    visible=False,
+                )
+            file_index2 = gr.Dropdown(
+                label=i18n("自动检测index路径,下拉式选择(dropdown)"),
+                choices=sorted(index_paths),
+                interactive=True,
+            )
+            index_rate1 = gr.Slider(
+                minimum=0,
+                maximum=1,
+                label=i18n("检索特征占比"),
+                value=0.88,
+                interactive=True,
+            )
+            resample_sr0 = gr.Slider(
+                minimum=0,
+                maximum=48000,
+                label=i18n("后处理重采样至最终采样率，0为不进行重采样"),
+                value=0,
+                step=1,
+                interactive=True,
+            )
+            rms_mix_rate0 = gr.Slider(
+                minimum=0,
+                maximum=1,
+                label=i18n("输入源音量包络替换输出音量包络融合比例，越靠近1越使用输出包络"),
+                value=1,
+                interactive=True,
+            )
+            protect0 = gr.Slider(
+                minimum=0,
+                maximum=0.5,
+                label=i18n("保护清辅音和呼吸声，防止电音撕裂等artifact，拉满0.5不开启，调低加大保护力度但可能降低索引效果"),
+                value=0.33,
+                step=0.01,
+                interactive=True,
+            )
+            f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"))
+            but0 = gr.Button(i18n("转换"), variant="primary")
+            vc_output1 = gr.Textbox(label=i18n("输出信息"))
+            vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)"))
+            but0.click(
+                vc_single,
+                [
+                    spk_item,
+                    vc_input3,
+                    vc_transform0,
+                    f0_file,
+                    f0method0,
+                    file_index1,
+                    file_index2,
+                    # file_big_npy1,
+                    index_rate1,
+                    filter_radius0,
+                    resample_sr0,
+                    rms_mix_rate0,
+                    protect0,
+                ],
+                [vc_output1, vc_output2],
+            )
+app.launch()

config.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import argparse
+import torch
+from multiprocessing import cpu_count
+def config_file_change_fp32():
+    for config_file in ["32k.json", "40k.json", "48k.json"]:
+        with open(f"configs/{config_file}", "r") as f:
+            strr = f.read().replace("true", "false")
+        with open(f"configs/{config_file}", "w") as f:
+            f.write(strr)
+    with open("trainset_preprocess_pipeline_print.py", "r") as f:
+        strr = f.read().replace("3.7", "3.0")
+    with open("trainset_preprocess_pipeline_print.py", "w") as f:
+        f.write(strr)
+class Config:
+    def __init__(self):
+        self.device = "cuda:0"
+        self.is_half = True
+        self.n_cpu = 0
+        self.gpu_name = None
+        self.gpu_mem = None
+        self.python_cmd = "python3"
+        self.listen_port = 7865
+        self.iscolab = False
+        self.noparallel = False
+        self.noautoopen = False
+        self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
+    @staticmethod
+    def arg_parse() -> tuple:
+        parser = argparse.ArgumentParser()
+        parser.add_argument("--port", type=int, default=7865, help="Listen port")
+        parser.add_argument(
+            "--pycmd", type=str, default="python3", help="Python command"
+        )
+        parser.add_argument("--colab", action="store_true", help="Launch in colab")
+        parser.add_argument(
+            "--noparallel", action="store_true", help="Disable parallel processing"
+        )
+        parser.add_argument(
+            "--noautoopen",
+            action="store_true",
+            help="Do not open in browser automatically",
+        )
+        cmd_opts = parser.parse_args()
+        cmd_opts.port = cmd_opts.port if 0 <= cmd_opts.port <= 65535 else 7865
+        return (
+            cmd_opts.pycmd,
+            cmd_opts.port,
+            cmd_opts.colab,
+            cmd_opts.noparallel,
+            cmd_opts.noautoopen,
+        )
+    def device_config(self) -> tuple:
+        if torch.cuda.is_available():
+            i_device = int(self.device.split(":")[-1])
+            self.gpu_name = torch.cuda.get_device_name(i_device)
+            if (
+                ("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
+                or "P40" in self.gpu_name.upper()
+                or "1060" in self.gpu_name
+                or "1070" in self.gpu_name
+                or "1080" in self.gpu_name
+            ):
+                print("16系/10系显卡和P40强制单精度")
+                self.is_half = False
+                config_file_change_fp32()
+            else:
+                self.gpu_name = None
+            self.gpu_mem = int(
+                torch.cuda.get_device_properties(i_device).total_memory
+                / 1024
+                / 1024
+                / 1024
+                + 0.4
+            )
+            if self.gpu_mem <= 4:
+                with open("trainset_preprocess_pipeline_print.py", "r") as f:
+                    strr = f.read().replace("3.7", "3.0")
+                with open("trainset_preprocess_pipeline_print.py", "w") as f:
+                    f.write(strr)
+        elif torch.backends.mps.is_available():
+            print("没有发现支持的N卡, 使用MPS进行推理")
+            self.device = "mps"
+            self.is_half = False
+            config_file_change_fp32()
+        else:
+            print("没有发现支持的N卡, 使用CPU进行推理")
+            self.device = "cpu"
+            self.is_half = False
+            config_file_change_fp32()
+        if self.n_cpu == 0:
+            self.n_cpu = cpu_count()
+        if self.is_half:
+            # 6G显存配置
+            x_pad = 3
+            x_query = 10
+            x_center = 60
+            x_max = 65
+        else:
+            # 5G显存配置
+            x_pad = 1
+            x_query = 6
+            x_center = 38
+            x_max = 41
+        if self.gpu_mem != None and self.gpu_mem <= 4:
+            x_pad = 1
+            x_query = 5
+            x_center = 30
+            x_max = 32
+        return x_pad, x_query, x_center, x_max

envfilescheck.bat ADDED Viewed

	@@ -0,0 +1,348 @@

+@echo off && chcp 65001
+echo working dir is %cd%
+echo downloading requirement aria2 check.
+echo=
+dir /a:d/b | findstr "aria2" > flag.txt
+findstr "aria2" flag.txt >nul
+if %errorlevel% ==0 (
+    echo aria2 checked.
+    echo=
+) else (
+    echo failed. please downloading aria2 from webpage!
+    echo unzip it and put in this directory!
+    timeout /T 5
+    start https://github.com/aria2/aria2/releases/tag/release-1.36.0
+    echo=
+    goto end
+)
+echo envfiles checking start.
+echo=
+for /f %%x in ('findstr /i /c:"aria2" "flag.txt"') do (set aria2=%%x)&goto endSch
+:endSch
+set d32=f0D32k.pth
+set d40=f0D40k.pth
+set d48=f0D48k.pth
+set g32=f0G32k.pth
+set g40=f0G40k.pth
+set g48=f0G48k.pth
+set d40v2=f0D40k.pth
+set g40v2=f0G40k.pth
+set dld32=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth
+set dld40=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth
+set dld48=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth
+set dlg32=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth
+set dlg40=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth
+set dlg48=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth
+set dld40v2=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth
+set dlg40v2=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth
+set hp2_all=HP2_all_vocals.pth
+set hp3_all=HP3_all_vocals.pth
+set hp5_only=HP5_only_main_vocal.pth
+set VR_DeEchoAggressive=VR-DeEchoAggressive.pth
+set VR_DeEchoDeReverb=VR-DeEchoDeReverb.pth
+set VR_DeEchoNormal=VR-DeEchoNormal.pth
+set onnx_dereverb=vocals.onnx
+set dlhp2_all=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2_all_vocals.pth
+set dlhp3_all=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP3_all_vocals.pth
+set dlhp5_only=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5_only_main_vocal.pth
+set dlVR_DeEchoAggressive=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoAggressive.pth
+set dlVR_DeEchoDeReverb=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoDeReverb.pth
+set dlVR_DeEchoNormal=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoNormal.pth
+set dlonnx_dereverb=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx
+set hb=hubert_base.pt
+set dlhb=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt
+echo dir check start.
+echo=
+if exist "%~dp0pretrained" (
+        echo dir .\pretrained checked.
+    ) else (
+        echo failed. generating dir .\pretrained.
+        mkdir pretrained
+    )
+if exist "%~dp0pretrained_v2" (
+        echo dir .\pretrained_v2 checked.
+    ) else (
+        echo failed. generating dir .\pretrained_v2.
+        mkdir pretrained_v2
+    )
+if exist "%~dp0uvr5_weights" (
+        echo dir .\uvr5_weights checked.
+    ) else (
+        echo failed. generating dir .\uvr5_weights.
+        mkdir uvr5_weights
+    )
+if exist "%~dp0uvr5_weights\onnx_dereverb_By_FoxJoy" (
+        echo dir .\uvr5_weights\onnx_dereverb_By_FoxJoy checked.
+    ) else (
+        echo failed. generating dir .\uvr5_weights\onnx_dereverb_By_FoxJoy.
+        mkdir uvr5_weights\onnx_dereverb_By_FoxJoy
+    )
+echo=
+echo dir check finished.
+echo=
+echo required files check start.
+echo checking D32k.pth
+if exist "%~dp0pretrained\D32k.pth" (
+        echo D32k.pth in .\pretrained checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d %~dp0pretrained -o D32k.pth
+        if exist "%~dp0pretrained\D32k.pth" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking D40k.pth
+if exist "%~dp0pretrained\D40k.pth" (
+        echo D40k.pth in .\pretrained checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d %~dp0pretrained -o D40k.pth
+        if exist "%~dp0pretrained\D40k.pth" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking D40k.pth
+if exist "%~dp0pretrained_v2\D40k.pth" (
+        echo D40k.pth in .\pretrained_v2 checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d %~dp0pretrained_v2 -o D40k.pth
+        if exist "%~dp0pretrained_v2\D40k.pth" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking D48k.pth
+if exist "%~dp0pretrained\D48k.pth" (
+        echo D48k.pth in .\pretrained checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d %~dp0pretrained -o D48k.pth
+        if exist "%~dp0pretrained\D48k.pth" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking G32k.pth
+if exist "%~dp0pretrained\G32k.pth" (
+        echo G32k.pth in .\pretrained checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d %~dp0pretrained -o G32k.pth
+        if exist "%~dp0pretrained\G32k.pth" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking G40k.pth
+if exist "%~dp0pretrained\G40k.pth" (
+        echo G40k.pth in .\pretrained checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d %~dp0pretrained -o G40k.pth
+        if exist "%~dp0pretrained\G40k.pth" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking G40k.pth
+if exist "%~dp0pretrained_v2\G40k.pth" (
+        echo G40k.pth in .\pretrained_v2 checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d %~dp0pretrained_v2 -o G40k.pth
+        if exist "%~dp0pretrained_v2\G40k.pth" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking G48k.pth
+if exist "%~dp0pretrained\G48k.pth" (
+        echo G48k.pth in .\pretrained checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d %~dp0pretrained -o G48k.pth
+        if exist "%~dp0pretrained\G48k.pth" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking %d32%
+if exist "%~dp0pretrained\%d32%" (
+        echo %d32% in .\pretrained checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld32% -d %~dp0pretrained -o %d32%
+        if exist "%~dp0pretrained\%d32%" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking %d40%
+if exist "%~dp0pretrained\%d40%" (
+        echo %d40% in .\pretrained checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld40% -d %~dp0pretrained -o %d40%
+        if exist "%~dp0pretrained\%d40%" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking %d40v2%
+if exist "%~dp0pretrained_v2\%d40v2%" (
+        echo %d40v2% in .\pretrained_v2 checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld40v2% -d %~dp0pretrained_v2 -o %d40v2%
+        if exist "%~dp0pretrained_v2\%d40v2%" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking %d48%
+if exist "%~dp0pretrained\%d48%" (
+        echo %d48% in .\pretrained checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld48% -d %~dp0pretrained -o %d48%
+        if exist "%~dp0pretrained\%d48%" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking %g32%
+if exist "%~dp0pretrained\%g32%" (
+        echo %g32% in .\pretrained checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg32% -d %~dp0pretrained -o %g32%
+        if exist "%~dp0pretrained\%g32%" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking %g40%
+if exist "%~dp0pretrained\%g40%" (
+        echo %g40% in .\pretrained checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg40% -d %~dp0pretrained -o %g40%
+        if exist "%~dp0pretrained\%g40%" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking %g40v2%
+if exist "%~dp0pretrained_v2\%g40v2%" (
+        echo %g40v2% in .\pretrained_v2 checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg40v2% -d %~dp0pretrained_v2 -o %g40v2%
+        if exist "%~dp0pretrained_v2\%g40v2%" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking %g48%
+if exist "%~dp0pretrained\%g48%" (
+        echo %g48% in .\pretrained checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg48% -d %~dp0\pretrained -o %g48%
+        if exist "%~dp0pretrained\%g48%" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking %hp2_all%
+if exist "%~dp0uvr5_weights\%hp2_all%" (
+        echo %hp2_all% in .\uvr5_weights checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhp2_all% -d %~dp0\uvr5_weights -o %hp2_all%
+        if exist "%~dp0uvr5_weights\%hp2_all%" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking %hp3_all%
+if exist "%~dp0uvr5_weights\%hp3_all%" (
+        echo %hp3_all% in .\uvr5_weights checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhp3_all% -d %~dp0\uvr5_weights -o %hp3_all%
+        if exist "%~dp0uvr5_weights\%hp3_all%" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking %hp5_only%
+if exist "%~dp0uvr5_weights\%hp5_only%" (
+        echo %hp5_only% in .\uvr5_weights checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhp5_only% -d %~dp0\uvr5_weights -o %hp5_only%
+        if exist "%~dp0uvr5_weights\%hp5_only%" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking %VR_DeEchoAggressive%
+if exist "%~dp0uvr5_weights\%VR_DeEchoAggressive%" (
+        echo %VR_DeEchoAggressive% in .\uvr5_weights checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlVR_DeEchoAggressive% -d %~dp0\uvr5_weights -o %VR_DeEchoAggressive%
+        if exist "%~dp0uvr5_weights\%VR_DeEchoAggressive%" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking %VR_DeEchoDeReverb%
+if exist "%~dp0uvr5_weights\%VR_DeEchoDeReverb%" (
+        echo %VR_DeEchoDeReverb% in .\uvr5_weights checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlVR_DeEchoDeReverb% -d %~dp0\uvr5_weights -o %VR_DeEchoDeReverb%
+        if exist "%~dp0uvr5_weights\%VR_DeEchoDeReverb%" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking %VR_DeEchoNormal%
+if exist "%~dp0uvr5_weights\%VR_DeEchoNormal%" (
+        echo %VR_DeEchoNormal% in .\uvr5_weights checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlVR_DeEchoNormal% -d %~dp0\uvr5_weights -o %VR_DeEchoNormal%
+        if exist "%~dp0uvr5_weights\%VR_DeEchoNormal%" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking %onnx_dereverb%
+if exist "%~dp0uvr5_weights\onnx_dereverb_By_FoxJoy\%onnx_dereverb%" (
+        echo %onnx_dereverb% in .\uvr5_weights\onnx_dereverb_By_FoxJoy checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlonnx_dereverb% -d %~dp0\uvr5_weights\onnx_dereverb_By_FoxJoy -o %onnx_dereverb%
+        if exist "%~dp0uvr5_weights\onnx_dereverb_By_FoxJoy\%onnx_dereverb%" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo checking %hb%
+if exist "%~dp0%hb%" (
+        echo %hb% in .\pretrained checked.
+        echo=
+    ) else (
+        echo failed. starting download from huggingface.
+        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhb% -d %~dp0 -o %hb%
+        if exist "%~dp0%hb%" (echo download successful.) else (echo please try again!
+        echo=)
+    )
+echo required files check finished.
+echo envfiles check complete.
+pause
+:end
+del flag.txt

export_onnx.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from infer_pack.models_onnx import SynthesizerTrnMsNSFsidM
+import torch
+if __name__ == "__main__":
+    MoeVS = True  # 模型是否为MoeVoiceStudio（原MoeSS）使用
+    ModelPath = "Shiroha/shiroha.pth"  # 模型路径
+    ExportedPath = "model.onnx"  # 输出路径
+    hidden_channels = 256  # hidden_channels，为768Vec做准备
+    cpt = torch.load(ModelPath, map_location="cpu")
+    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
+    print(*cpt["config"])
+    test_phone = torch.rand(1, 200, hidden_channels)  # hidden unit
+    test_phone_lengths = torch.tensor([200]).long()  # hidden unit 长度（貌似没啥用）
+    test_pitch = torch.randint(size=(1, 200), low=5, high=255)  # 基频（单位赫兹）
+    test_pitchf = torch.rand(1, 200)  # nsf基频
+    test_ds = torch.LongTensor([0])  # 说话人ID
+    test_rnd = torch.rand(1, 192, 200)  # 噪声（加入随机因子）
+    device = "cpu"  # 导出时设备（不影响使用模型）
+    net_g = SynthesizerTrnMsNSFsidM(
+        *cpt["config"], is_half=False
+    )  # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
+    net_g.load_state_dict(cpt["weight"], strict=False)
+    input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
+    output_names = [
+        "audio",
+    ]
+    # net_g.construct_spkmixmap(n_speaker) 多角色混合轨道导出
+    torch.onnx.export(
+        net_g,
+        (
+            test_phone.to(device),
+            test_phone_lengths.to(device),
+            test_pitch.to(device),
+            test_pitchf.to(device),
+            test_ds.to(device),
+            test_rnd.to(device),
+        ),
+        ExportedPath,
+        dynamic_axes={
+            "phone": [1],
+            "pitch": [1],
+            "pitchf": [1],
+            "rnd": [2],
+        },
+        do_constant_folding=False,
+        opset_version=16,
+        verbose=False,
+        input_names=input_names,
+        output_names=output_names,
+    )

extract_f0_print.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import os, traceback, sys, parselmouth
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+from my_utils import load_audio
+import pyworld
+from scipy.io import wavfile
+import numpy as np, logging
+logging.getLogger("numba").setLevel(logging.WARNING)
+from multiprocessing import Process
+exp_dir = sys.argv[1]
+f = open("%s/extract_f0_feature.log" % exp_dir, "a+")
+def printt(strr):
+    print(strr)
+    f.write("%s\n" % strr)
+    f.flush()
+n_p = int(sys.argv[2])
+f0method = sys.argv[3]
+class FeatureInput(object):
+    def __init__(self, samplerate=16000, hop_size=160):
+        self.fs = samplerate
+        self.hop = hop_size
+        self.f0_bin = 256
+        self.f0_max = 1100.0
+        self.f0_min = 50.0
+        self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
+        self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
+    def compute_f0(self, path, f0_method):
+        x = load_audio(path, self.fs)
+        p_len = x.shape[0] // self.hop
+        if f0_method == "pm":
+            time_step = 160 / 16000 * 1000
+            f0_min = 50
+            f0_max = 1100
+            f0 = (
+                parselmouth.Sound(x, self.fs)
+                .to_pitch_ac(
+                    time_step=time_step / 1000,
+                    voicing_threshold=0.6,
+                    pitch_floor=f0_min,
+                    pitch_ceiling=f0_max,
+                )
+                .selected_array["frequency"]
+            )
+            pad_size = (p_len - len(f0) + 1) // 2
+            if pad_size > 0 or p_len - len(f0) - pad_size > 0:
+                f0 = np.pad(
+                    f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
+                )
+        elif f0_method == "harvest":
+            f0, t = pyworld.harvest(
+                x.astype(np.double),
+                fs=self.fs,
+                f0_ceil=self.f0_max,
+                f0_floor=self.f0_min,
+                frame_period=1000 * self.hop / self.fs,
+            )
+            f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.fs)
+        elif f0_method == "dio":
+            f0, t = pyworld.dio(
+                x.astype(np.double),
+                fs=self.fs,
+                f0_ceil=self.f0_max,
+                f0_floor=self.f0_min,
+                frame_period=1000 * self.hop / self.fs,
+            )
+            f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.fs)
+        return f0
+    def coarse_f0(self, f0):
+        f0_mel = 1127 * np.log(1 + f0 / 700)
+        f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * (
+            self.f0_bin - 2
+        ) / (self.f0_mel_max - self.f0_mel_min) + 1
+        # use 0 or 1
+        f0_mel[f0_mel <= 1] = 1
+        f0_mel[f0_mel > self.f0_bin - 1] = self.f0_bin - 1
+        f0_coarse = np.rint(f0_mel).astype(int)
+        assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, (
+            f0_coarse.max(),
+            f0_coarse.min(),
+        )
+        return f0_coarse
+    def go(self, paths, f0_method):
+        if len(paths) == 0:
+            printt("no-f0-todo")
+        else:
+            printt("todo-f0-%s" % len(paths))
+            n = max(len(paths) // 5, 1)  # 每个进程最多打印5条
+            for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths):
+                try:
+                    if idx % n == 0:
+                        printt("f0ing,now-%s,all-%s,-%s" % (idx, len(paths), inp_path))
+                    if (
+                        os.path.exists(opt_path1 + ".npy") == True
+                        and os.path.exists(opt_path2 + ".npy") == True
+                    ):
+                        continue
+                    featur_pit = self.compute_f0(inp_path, f0_method)
+                    np.save(
+                        opt_path2,
+                        featur_pit,
+                        allow_pickle=False,
+                    )  # nsf
+                    coarse_pit = self.coarse_f0(featur_pit)
+                    np.save(
+                        opt_path1,
+                        coarse_pit,
+                        allow_pickle=False,
+                    )  # ori
+                except:
+                    printt("f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc()))
+if __name__ == "__main__":
+    # exp_dir=r"E:\codes\py39\dataset\mi-test"
+    # n_p=16
+    # f = open("%s/log_extract_f0.log"%exp_dir, "w")
+    printt(sys.argv)
+    featureInput = FeatureInput()
+    paths = []
+    inp_root = "%s/1_16k_wavs" % (exp_dir)
+    opt_root1 = "%s/2a_f0" % (exp_dir)
+    opt_root2 = "%s/2b-f0nsf" % (exp_dir)
+    os.makedirs(opt_root1, exist_ok=True)
+    os.makedirs(opt_root2, exist_ok=True)
+    for name in sorted(list(os.listdir(inp_root))):
+        inp_path = "%s/%s" % (inp_root, name)
+        if "spec" in inp_path:
+            continue
+        opt_path1 = "%s/%s" % (opt_root1, name)
+        opt_path2 = "%s/%s" % (opt_root2, name)
+        paths.append([inp_path, opt_path1, opt_path2])
+    ps = []
+    for i in range(n_p):
+        p = Process(
+            target=featureInput.go,
+            args=(
+                paths[i::n_p],
+                f0method,
+            ),
+        )
+        ps.append(p)
+        p.start()
+    for i in range(n_p):
+        ps[i].join()

extract_feature_print.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import os, sys, traceback
+# device=sys.argv[1]
+n_part = int(sys.argv[2])
+i_part = int(sys.argv[3])
+if len(sys.argv) == 5:
+    exp_dir = sys.argv[4]
+    version = sys.argv[5]
+else:
+    i_gpu = sys.argv[4]
+    exp_dir = sys.argv[5]
+    os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu)
+    version = sys.argv[6]
+import torch
+import torch.nn.functional as F
+import soundfile as sf
+import numpy as np
+from fairseq import checkpoint_utils
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+if torch.cuda.is_available():
+    device = "cuda"
+elif torch.backends.mps.is_available():
+    device = "mps"
+else:
+    device = "cpu"
+f = open("%s/extract_f0_feature.log" % exp_dir, "a+")
+def printt(strr):
+    print(strr)
+    f.write("%s\n" % strr)
+    f.flush()
+printt(sys.argv)
+model_path = "hubert_base.pt"
+printt(exp_dir)
+wavPath = "%s/1_16k_wavs" % exp_dir
+outPath = (
+    "%s/3_feature256" % exp_dir if version == "v1" else "%s/3_feature768" % exp_dir
+)
+os.makedirs(outPath, exist_ok=True)
+# wave must be 16k, hop_size=320
+def readwave(wav_path, normalize=False):
+    wav, sr = sf.read(wav_path)
+    assert sr == 16000
+    feats = torch.from_numpy(wav).float()
+    if feats.dim() == 2:  # double channels
+        feats = feats.mean(-1)
+    assert feats.dim() == 1, feats.dim()
+    if normalize:
+        with torch.no_grad():
+            feats = F.layer_norm(feats, feats.shape)
+    feats = feats.view(1, -1)
+    return feats
+# HuBERT model
+printt("load model(s) from {}".format(model_path))
+# if hubert model is exist
+if os.access(model_path, os.F_OK) == False:
+    printt(
+        "Error: Extracting is shut down because %s does not exist, you may download it from https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main"
+        % model_path
+    )
+    exit(0)
+models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
+    [model_path],
+    suffix="",
+)
+model = models[0]
+model = model.to(device)
+printt("move model to %s" % device)
+if device not in ["mps", "cpu"]:
+    model = model.half()
+model.eval()
+todo = sorted(list(os.listdir(wavPath)))[i_part::n_part]
+n = max(1, len(todo) // 10)  # 最多打印十条
+if len(todo) == 0:
+    printt("no-feature-todo")
+else:
+    printt("all-feature-%s" % len(todo))
+    for idx, file in enumerate(todo):
+        try:
+            if file.endswith(".wav"):
+                wav_path = "%s/%s" % (wavPath, file)
+                out_path = "%s/%s" % (outPath, file.replace("wav", "npy"))
+                if os.path.exists(out_path):
+                    continue
+                feats = readwave(wav_path, normalize=saved_cfg.task.normalize)
+                padding_mask = torch.BoolTensor(feats.shape).fill_(False)
+                inputs = {
+                    "source": feats.half().to(device)
+                    if device not in ["mps", "cpu"]
+                    else feats.to(device),
+                    "padding_mask": padding_mask.to(device),
+                    "output_layer": 9 if version == "v1" else 12,  # layer 9
+                }
+                with torch.no_grad():
+                    logits = model.extract_features(**inputs)
+                    feats = (
+                        model.final_proj(logits[0]) if version == "v1" else logits[0]
+                    )
+                feats = feats.squeeze(0).float().cpu().numpy()
+                if np.isnan(feats).sum() == 0:
+                    np.save(out_path, feats, allow_pickle=False)
+                else:
+                    printt("%s-contains nan" % file)
+                if idx % n == 0:
+                    printt("now-%s,all-%s,%s,%s" % (len(todo), idx, file, feats.shape))
+        except:
+            printt(traceback.format_exc())
+    printt("all-feature-done")

extract_locale.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import json
+import re
+# Define regular expression patterns
+pattern = r"""i18n\([\s\n\t]*(["'][^"']+["'])[\s\n\t]*\)"""
+# Initialize the dictionary to store key-value pairs
+data = {}
+def process(fn: str):
+    global data
+    with open(fn, "r", encoding="utf-8") as f:
+        contents = f.read()
+        matches = re.findall(pattern, contents)
+        for key in matches:
+            key = eval(key)
+            print("extract:", key)
+            data[key] = key
+print("processing infer-web.py")
+process("infer-web.py")
+print("processing gui.py")
+process("gui.py")
+# Save as a JSON file
+with open("./i18n/zh_CN.json", "w", encoding="utf-8") as f:
+    json.dump(data, f, ensure_ascii=False, indent=4)
+    f.write("\n")

ffmpeg ADDED Viewed

Binary file (302 kB). View file

go-realtime-gui.bat ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ runtime\python.exe gui.py
2	+ pause

go-web.bat ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ runtime\python.exe infer-web.py --pycmd runtime\python.exe --port 7897
2	+ pause

gui.py ADDED Viewed

	@@ -0,0 +1,698 @@

+"""
+0416后的更新：
+    引入config中half
+    重建npy而不用填写
+    v2支持
+    无f0模型支持
+    修复
+    int16：
+    增加无索引支持
+    f0算法改harvest(怎么看就只有这个会影响CPU占用)，但是不这么改效果不好
+"""
+import os, sys, traceback, re
+import json
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+from config import Config
+Config = Config()
+import PySimpleGUI as sg
+import sounddevice as sd
+import noisereduce as nr
+import numpy as np
+from fairseq import checkpoint_utils
+import librosa, torch, pyworld, faiss, time, threading
+import torch.nn.functional as F
+import torchaudio.transforms as tat
+import scipy.signal as signal
+# import matplotlib.pyplot as plt
+from infer_pack.models import (
+    SynthesizerTrnMs256NSFsid,
+    SynthesizerTrnMs256NSFsid_nono,
+    SynthesizerTrnMs768NSFsid,
+    SynthesizerTrnMs768NSFsid_nono,
+)
+from i18n import I18nAuto
+i18n = I18nAuto()
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+current_dir = os.getcwd()
+class RVC:
+    def __init__(
+        self, key, hubert_path, pth_path, index_path, npy_path, index_rate
+    ) -> None:
+        """
+        初始化
+        """
+        try:
+            self.f0_up_key = key
+            self.time_step = 160 / 16000 * 1000
+            self.f0_min = 50
+            self.f0_max = 1100
+            self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
+            self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
+            self.sr = 16000
+            self.window = 160
+            if index_rate != 0:
+                self.index = faiss.read_index(index_path)
+                # self.big_npy = np.load(npy_path)
+                self.big_npy = self.index.reconstruct_n(0, self.index.ntotal)
+                print("index search enabled")
+            self.index_rate = index_rate
+            model_path = hubert_path
+            print("load model(s) from {}".format(model_path))
+            models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
+                [model_path],
+                suffix="",
+            )
+            self.model = models[0]
+            self.model = self.model.to(device)
+            if Config.is_half:
+                self.model = self.model.half()
+            else:
+                self.model = self.model.float()
+            self.model.eval()
+            cpt = torch.load(pth_path, map_location="cpu")
+            self.tgt_sr = cpt["config"][-1]
+            cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
+            self.if_f0 = cpt.get("f0", 1)
+            self.version = cpt.get("version", "v1")
+            if self.version == "v1":
+                if self.if_f0 == 1:
+                    self.net_g = SynthesizerTrnMs256NSFsid(
+                        *cpt["config"], is_half=Config.is_half
+                    )
+                else:
+                    self.net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
+            elif self.version == "v2":
+                if self.if_f0 == 1:
+                    self.net_g = SynthesizerTrnMs768NSFsid(
+                        *cpt["config"], is_half=Config.is_half
+                    )
+                else:
+                    self.net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
+            del self.net_g.enc_q
+            print(self.net_g.load_state_dict(cpt["weight"], strict=False))
+            self.net_g.eval().to(device)
+            if Config.is_half:
+                self.net_g = self.net_g.half()
+            else:
+                self.net_g = self.net_g.float()
+        except:
+            print(traceback.format_exc())
+    def get_f0(self, x, f0_up_key, inp_f0=None):
+        x_pad = 1
+        f0_min = 50
+        f0_max = 1100
+        f0_mel_min = 1127 * np.log(1 + f0_min / 700)
+        f0_mel_max = 1127 * np.log(1 + f0_max / 700)
+        f0, t = pyworld.harvest(
+            x.astype(np.double),
+            fs=self.sr,
+            f0_ceil=f0_max,
+            f0_floor=f0_min,
+            frame_period=10,
+        )
+        f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr)
+        f0 = signal.medfilt(f0, 3)
+        f0 *= pow(2, f0_up_key / 12)
+        # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
+        tf0 = self.sr // self.window  # 每秒f0点数
+        if inp_f0 is not None:
+            delta_t = np.round(
+                (inp_f0[:, 0].max() - inp_f0[:, 0].min()) * tf0 + 1
+            ).astype("int16")
+            replace_f0 = np.interp(
+                list(range(delta_t)), inp_f0[:, 0] * 100, inp_f0[:, 1]
+            )
+            shape = f0[x_pad * tf0 : x_pad * tf0 + len(replace_f0)].shape[0]
+            f0[x_pad * tf0 : x_pad * tf0 + len(replace_f0)] = replace_f0[:shape]
+        # with open("test_opt.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
+        f0bak = f0.copy()
+        f0_mel = 1127 * np.log(1 + f0 / 700)
+        f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / (
+            f0_mel_max - f0_mel_min
+        ) + 1
+        f0_mel[f0_mel <= 1] = 1
+        f0_mel[f0_mel > 255] = 255
+        f0_coarse = np.rint(f0_mel).astype(np.int)
+        return f0_coarse, f0bak  # 1-0
+    def infer(self, feats: torch.Tensor) -> np.ndarray:
+        """
+        推理函数
+        """
+        audio = feats.clone().cpu().numpy()
+        assert feats.dim() == 1, feats.dim()
+        feats = feats.view(1, -1)
+        padding_mask = torch.BoolTensor(feats.shape).fill_(False)
+        if Config.is_half:
+            feats = feats.half()
+        else:
+            feats = feats.float()
+        inputs = {
+            "source": feats.to(device),
+            "padding_mask": padding_mask.to(device),
+            "output_layer": 9 if self.version == "v1" else 12,
+        }
+        torch.cuda.synchronize()
+        with torch.no_grad():
+            logits = self.model.extract_features(**inputs)
+            feats = (
+                self.model.final_proj(logits[0]) if self.version == "v1" else logits[0]
+            )
+        ####索引优化
+        try:
+            if (
+                hasattr(self, "index")
+                and hasattr(self, "big_npy")
+                and self.index_rate != 0
+            ):
+                npy = feats[0].cpu().numpy().astype("float32")
+                score, ix = self.index.search(npy, k=8)
+                weight = np.square(1 / score)
+                weight /= weight.sum(axis=1, keepdims=True)
+                npy = np.sum(self.big_npy[ix] * np.expand_dims(weight, axis=2), axis=1)
+                if Config.is_half:
+                    npy = npy.astype("float16")
+                feats = (
+                    torch.from_numpy(npy).unsqueeze(0).to(device) * self.index_rate
+                    + (1 - self.index_rate) * feats
+                )
+            else:
+                print("index search FAIL or disabled")
+        except:
+            traceback.print_exc()
+            print("index search FAIL")
+        feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
+        torch.cuda.synchronize()
+        print(feats.shape)
+        if self.if_f0 == 1:
+            pitch, pitchf = self.get_f0(audio, self.f0_up_key)
+            p_len = min(feats.shape[1], 13000, pitch.shape[0])  # 太大了爆显存
+        else:
+            pitch, pitchf = None, None
+            p_len = min(feats.shape[1], 13000)  # 太大了爆显存
+        torch.cuda.synchronize()
+        # print(feats.shape,pitch.shape)
+        feats = feats[:, :p_len, :]
+        if self.if_f0 == 1:
+            pitch = pitch[:p_len]
+            pitchf = pitchf[:p_len]
+            pitch = torch.LongTensor(pitch).unsqueeze(0).to(device)
+            pitchf = torch.FloatTensor(pitchf).unsqueeze(0).to(device)
+        p_len = torch.LongTensor([p_len]).to(device)
+        ii = 0  # sid
+        sid = torch.LongTensor([ii]).to(device)
+        with torch.no_grad():
+            if self.if_f0 == 1:
+                infered_audio = (
+                    self.net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]
+                    .data.cpu()
+                    .float()
+                )
+            else:
+                infered_audio = (
+                    self.net_g.infer(feats, p_len, sid)[0][0, 0].data.cpu().float()
+                )
+        torch.cuda.synchronize()
+        return infered_audio
+class GUIConfig:
+    def __init__(self) -> None:
+        self.hubert_path: str = ""
+        self.pth_path: str = ""
+        self.index_path: str = ""
+        self.npy_path: str = ""
+        self.pitch: int = 12
+        self.samplerate: int = 44100
+        self.block_time: float = 1.0  # s
+        self.buffer_num: int = 1
+        self.threhold: int = -30
+        self.crossfade_time: float = 0.08
+        self.extra_time: float = 0.04
+        self.I_noise_reduce = False
+        self.O_noise_reduce = False
+        self.index_rate = 0.3
+class GUI:
+    def __init__(self) -> None:
+        self.config = GUIConfig()
+        self.flag_vc = False
+        self.launcher()
+    def load(self):
+        input_devices, output_devices, _, _ = self.get_devices()
+        try:
+            with open("values1.json", "r") as j:
+                data = json.load(j)
+        except:
+            with open("values1.json", "w") as j:
+                data = {
+                    "pth_path": " ",
+                    "index_path": " ",
+                    "sg_input_device": input_devices[sd.default.device[0]],
+                    "sg_output_device": output_devices[sd.default.device[1]],
+                    "threhold": "-45",
+                    "pitch": "0",
+                    "index_rate": "0",
+                    "block_time": "1",
+                    "crossfade_length": "0.04",
+                    "extra_time": "1",
+                }
+        return data
+    def launcher(self):
+        data = self.load()
+        sg.theme("LightBlue3")
+        input_devices, output_devices, _, _ = self.get_devices()
+        layout = [
+            [
+                sg.Frame(
+                    title=i18n("加载模型"),
+                    layout=[
+                        [
+                            sg.Input(
+                                default_text="hubert_base.pt",
+                                key="hubert_path",
+                                disabled=True,
+                            ),
+                            sg.FileBrowse(
+                                i18n("Hubert模型"),
+                                initial_folder=os.path.join(os.getcwd()),
+                                file_types=((". pt"),),
+                            ),
+                        ],
+                        [
+                            sg.Input(
+                                default_text=data.get("pth_path", ""),
+                                key="pth_path",
+                            ),
+                            sg.FileBrowse(
+                                i18n("选择.pth文件"),
+                                initial_folder=os.path.join(os.getcwd(), "weights"),
+                                file_types=((". pth"),),
+                            ),
+                        ],
+                        [
+                            sg.Input(
+                                default_text=data.get("index_path", ""),
+                                key="index_path",
+                            ),
+                            sg.FileBrowse(
+                                i18n("选择.index文件"),
+                                initial_folder=os.path.join(os.getcwd(), "logs"),
+                                file_types=((". index"),),
+                            ),
+                        ],
+                        [
+                            sg.Input(
+                                default_text="你不需要填写这个You don't need write this.",
+                                key="npy_path",
+                                disabled=True,
+                            ),
+                            sg.FileBrowse(
+                                i18n("选择.npy文件"),
+                                initial_folder=os.path.join(os.getcwd(), "logs"),
+                                file_types=((". npy"),),
+                            ),
+                        ],
+                    ],
+                )
+            ],
+            [
+                sg.Frame(
+                    layout=[
+                        [
+                            sg.Text(i18n("输入设备")),
+                            sg.Combo(
+                                input_devices,
+                                key="sg_input_device",
+                                default_value=data.get("sg_input_device", ""),
+                            ),
+                        ],
+                        [
+                            sg.Text(i18n("输出设备")),
+                            sg.Combo(
+                                output_devices,
+                                key="sg_output_device",
+                                default_value=data.get("sg_output_device", ""),
+                            ),
+                        ],
+                    ],
+                    title=i18n("音频设备(请使用同种类驱动)"),
+                )
+            ],
+            [
+                sg.Frame(
+                    layout=[
+                        [
+                            sg.Text(i18n("响应阈值")),
+                            sg.Slider(
+                                range=(-60, 0),
+                                key="threhold",
+                                resolution=1,
+                                orientation="h",
+                                default_value=data.get("threhold", ""),
+                            ),
+                        ],
+                        [
+                            sg.Text(i18n("音调设置")),
+                            sg.Slider(
+                                range=(-24, 24),
+                                key="pitch",
+                                resolution=1,
+                                orientation="h",
+                                default_value=data.get("pitch", ""),
+                            ),
+                        ],
+                        [
+                            sg.Text(i18n("Index Rate")),
+                            sg.Slider(
+                                range=(0.0, 1.0),
+                                key="index_rate",
+                                resolution=0.01,
+                                orientation="h",
+                                default_value=data.get("index_rate", ""),
+                            ),
+                        ],
+                    ],
+                    title=i18n("常规设置"),
+                ),
+                sg.Frame(
+                    layout=[
+                        [
+                            sg.Text(i18n("采样长度")),
+                            sg.Slider(
+                                range=(0.1, 3.0),
+                                key="block_time",
+                                resolution=0.1,
+                                orientation="h",
+                                default_value=data.get("block_time", ""),
+                            ),
+                        ],
+                        [
+                            sg.Text(i18n("淡入淡出长度")),
+                            sg.Slider(
+                                range=(0.01, 0.15),
+                                key="crossfade_length",
+                                resolution=0.01,
+                                orientation="h",
+                                default_value=data.get("crossfade_length", ""),
+                            ),
+                        ],
+                        [
+                            sg.Text(i18n("额外推理时长")),
+                            sg.Slider(
+                                range=(0.05, 3.00),
+                                key="extra_time",
+                                resolution=0.01,
+                                orientation="h",
+                                default_value=data.get("extra_time", ""),
+                            ),
+                        ],
+                        [
+                            sg.Checkbox(i18n("输入降噪"), key="I_noise_reduce"),
+                            sg.Checkbox(i18n("输出降噪"), key="O_noise_reduce"),
+                        ],
+                    ],
+                    title=i18n("性能设置"),
+                ),
+            ],
+            [
+                sg.Button(i18n("开始音频转换"), key="start_vc"),
+                sg.Button(i18n("停止音频转换"), key="stop_vc"),
+                sg.Text(i18n("推理时间(ms):")),
+                sg.Text("0", key="infer_time"),
+            ],
+        ]
+        self.window = sg.Window("RVC - GUI", layout=layout)
+        self.event_handler()
+    def event_handler(self):
+        while True:
+            event, values = self.window.read()
+            if event == sg.WINDOW_CLOSED:
+                self.flag_vc = False
+                exit()
+            if event == "start_vc" and self.flag_vc == False:
+                if self.set_values(values) == True:
+                    print("using_cuda:" + str(torch.cuda.is_available()))
+                    self.start_vc()
+                    settings = {
+                        "pth_path": values["pth_path"],
+                        "index_path": values["index_path"],
+                        "sg_input_device": values["sg_input_device"],
+                        "sg_output_device": values["sg_output_device"],
+                        "threhold": values["threhold"],
+                        "pitch": values["pitch"],
+                        "index_rate": values["index_rate"],
+                        "block_time": values["block_time"],
+                        "crossfade_length": values["crossfade_length"],
+                        "extra_time": values["extra_time"],
+                    }
+                    with open("values1.json", "w") as j:
+                        json.dump(settings, j)
+            if event == "stop_vc" and self.flag_vc == True:
+                self.flag_vc = False
+    def set_values(self, values):
+        if len(values["pth_path"].strip()) == 0:
+            sg.popup(i18n("请选择pth文件"))
+            return False
+        if len(values["index_path"].strip()) == 0:
+            sg.popup(i18n("请选择index文件"))
+            return False
+        pattern = re.compile("[^\x00-\x7F]+")
+        if pattern.findall(values["hubert_path"]):
+            sg.popup(i18n("hubert模型路径不可包含中文"))
+            return False
+        if pattern.findall(values["pth_path"]):
+            sg.popup(i18n("pth文件路径不可包含中文"))
+            return False
+        if pattern.findall(values["index_path"]):
+            sg.popup(i18n("index文件路径不可包含中文"))
+            return False
+        self.set_devices(values["sg_input_device"], values["sg_output_device"])
+        self.config.hubert_path = os.path.join(current_dir, "hubert_base.pt")
+        self.config.pth_path = values["pth_path"]
+        self.config.index_path = values["index_path"]
+        self.config.npy_path = values["npy_path"]
+        self.config.threhold = values["threhold"]
+        self.config.pitch = values["pitch"]
+        self.config.block_time = values["block_time"]
+        self.config.crossfade_time = values["crossfade_length"]
+        self.config.extra_time = values["extra_time"]
+        self.config.I_noise_reduce = values["I_noise_reduce"]
+        self.config.O_noise_reduce = values["O_noise_reduce"]
+        self.config.index_rate = values["index_rate"]
+        return True
+    def start_vc(self):
+        torch.cuda.empty_cache()
+        self.flag_vc = True
+        self.block_frame = int(self.config.block_time * self.config.samplerate)
+        self.crossfade_frame = int(self.config.crossfade_time * self.config.samplerate)
+        self.sola_search_frame = int(0.012 * self.config.samplerate)
+        self.delay_frame = int(0.01 * self.config.samplerate)  # 往前预留0.02s
+        self.extra_frame = int(self.config.extra_time * self.config.samplerate)
+        self.rvc = None
+        self.rvc = RVC(
+            self.config.pitch,
+            self.config.hubert_path,
+            self.config.pth_path,
+            self.config.index_path,
+            self.config.npy_path,
+            self.config.index_rate,
+        )
+        self.input_wav: np.ndarray = np.zeros(
+            self.extra_frame
+            + self.crossfade_frame
+            + self.sola_search_frame
+            + self.block_frame,
+            dtype="float32",
+        )
+        self.output_wav: torch.Tensor = torch.zeros(
+            self.block_frame, device=device, dtype=torch.float32
+        )
+        self.sola_buffer: torch.Tensor = torch.zeros(
+            self.crossfade_frame, device=device, dtype=torch.float32
+        )
+        self.fade_in_window: torch.Tensor = torch.linspace(
+            0.0, 1.0, steps=self.crossfade_frame, device=device, dtype=torch.float32
+        )
+        self.fade_out_window: torch.Tensor = 1 - self.fade_in_window
+        self.resampler1 = tat.Resample(
+            orig_freq=self.config.samplerate, new_freq=16000, dtype=torch.float32
+        )
+        self.resampler2 = tat.Resample(
+            orig_freq=self.rvc.tgt_sr,
+            new_freq=self.config.samplerate,
+            dtype=torch.float32,
+        )
+        thread_vc = threading.Thread(target=self.soundinput)
+        thread_vc.start()
+    def soundinput(self):
+        """
+        接受音频输入
+        """
+        with sd.Stream(
+            callback=self.audio_callback,
+            blocksize=self.block_frame,
+            samplerate=self.config.samplerate,
+            dtype="float32",
+        ):
+            while self.flag_vc:
+                time.sleep(self.config.block_time)
+                print("Audio block passed.")
+        print("ENDing VC")
+    def audio_callback(
+        self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
+    ):
+        """
+        音频处理
+        """
+        start_time = time.perf_counter()
+        indata = librosa.to_mono(indata.T)
+        if self.config.I_noise_reduce:
+            indata[:] = nr.reduce_noise(y=indata, sr=self.config.samplerate)
+        """noise gate"""
+        frame_length = 2048
+        hop_length = 1024
+        rms = librosa.feature.rms(
+            y=indata, frame_length=frame_length, hop_length=hop_length
+        )
+        db_threhold = librosa.amplitude_to_db(rms, ref=1.0)[0] < self.config.threhold
+        # print(rms.shape,db.shape,db)
+        for i in range(db_threhold.shape[0]):
+            if db_threhold[i]:
+                indata[i * hop_length : (i + 1) * hop_length] = 0
+        self.input_wav[:] = np.append(self.input_wav[self.block_frame :], indata)
+        # infer
+        print("input_wav:" + str(self.input_wav.shape))
+        # print('infered_wav:'+str(infer_wav.shape))
+        infer_wav: torch.Tensor = self.resampler2(
+            self.rvc.infer(self.resampler1(torch.from_numpy(self.input_wav)))
+        )[-self.crossfade_frame - self.sola_search_frame - self.block_frame :].to(
+            device
+        )
+        print("infer_wav:" + str(infer_wav.shape))
+        # SOLA algorithm from https://github.com/yxlllc/DDSP-SVC
+        cor_nom = F.conv1d(
+            infer_wav[None, None, : self.crossfade_frame + self.sola_search_frame],
+            self.sola_buffer[None, None, :],
+        )
+        cor_den = torch.sqrt(
+            F.conv1d(
+                infer_wav[None, None, : self.crossfade_frame + self.sola_search_frame]
+                ** 2,
+                torch.ones(1, 1, self.crossfade_frame, device=device),
+            )
+            + 1e-8
+        )
+        sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0])
+        print("sola offset: " + str(int(sola_offset)))
+        # crossfade
+        self.output_wav[:] = infer_wav[sola_offset : sola_offset + self.block_frame]
+        self.output_wav[: self.crossfade_frame] *= self.fade_in_window
+        self.output_wav[: self.crossfade_frame] += self.sola_buffer[:]
+        if sola_offset < self.sola_search_frame:
+            self.sola_buffer[:] = (
+                infer_wav[
+                    -self.sola_search_frame
+                    - self.crossfade_frame
+                    + sola_offset : -self.sola_search_frame
+                    + sola_offset
+                ]
+                * self.fade_out_window
+            )
+        else:
+            self.sola_buffer[:] = (
+                infer_wav[-self.crossfade_frame :] * self.fade_out_window
+            )
+        if self.config.O_noise_reduce:
+            outdata[:] = np.tile(
+                nr.reduce_noise(
+                    y=self.output_wav[:].cpu().numpy(), sr=self.config.samplerate
+                ),
+                (2, 1),
+            ).T
+        else:
+            outdata[:] = self.output_wav[:].repeat(2, 1).t().cpu().numpy()
+        total_time = time.perf_counter() - start_time
+        self.window["infer_time"].update(int(total_time * 1000))
+        print("infer time:" + str(total_time))
+    def get_devices(self, update: bool = True):
+        """获取设备列表"""
+        if update:
+            sd._terminate()
+            sd._initialize()
+        devices = sd.query_devices()
+        hostapis = sd.query_hostapis()
+        for hostapi in hostapis:
+            for device_idx in hostapi["devices"]:
+                devices[device_idx]["hostapi_name"] = hostapi["name"]
+        input_devices = [
+            f"{d['name']} ({d['hostapi_name']})"
+            for d in devices
+            if d["max_input_channels"] > 0
+        ]
+        output_devices = [
+            f"{d['name']} ({d['hostapi_name']})"
+            for d in devices
+            if d["max_output_channels"] > 0
+        ]
+        input_devices_indices = [
+            d["index"] if "index" in d else d["name"]
+            for d in devices
+            if d["max_input_channels"] > 0
+        ]
+        output_devices_indices = [
+            d["index"] if "index" in d else d["name"]
+            for d in devices
+            if d["max_output_channels"] > 0
+        ]
+        return (
+            input_devices,
+            output_devices,
+            input_devices_indices,
+            output_devices_indices,
+        )
+    def set_devices(self, input_device, output_device):
+        """设置输出设备"""
+        (
+            input_devices,
+            output_devices,
+            input_device_indices,
+            output_device_indices,
+        ) = self.get_devices()
+        sd.default.device[0] = input_device_indices[input_devices.index(input_device)]
+        sd.default.device[1] = output_device_indices[
+            output_devices.index(output_device)
+        ]
+        print("input device:" + str(sd.default.device[0]) + ":" + str(input_device))
+        print("output device:" + str(sd.default.device[1]) + ":" + str(output_device))
+gui = GUI()

host.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "version": "2.0",
+  "logging": {
+    "applicationInsights": {
+      "samplingSettings": {
+        "isEnabled": true,
+        "excludedTypes": "Request"
+      }
+    }
+  },
+  "extensionBundle": {
+    "id": "Microsoft.Azure.Functions.ExtensionBundle",
+    "version": "[4.*, 5.0.0)"
+  }
+}

hubert_base.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f54b40fd2802423a5643779c4861af1e9ee9c1564dc9d32f54f20b5ffba7db96
+size 189507909

i18n.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import locale
+import json
+import os
+def load_language_list(language):
+    with open(f"./i18n/{language}.json", "r", encoding="utf-8") as f:
+        language_list = json.load(f)
+    return language_list
+class I18nAuto:
+    def __init__(self, language=None):
+        if language in ["Auto", None]:
+            language = locale.getdefaultlocale()[
+                0
+            ]  # getlocale can't identify the system's language ((None, None))
+        if not os.path.exists(f"./i18n/{language}.json"):
+            language = "en_US"
+        self.language = language
+        # print("Use Language:", language)
+        self.language_map = load_language_list(language)
+    def __call__(self, key):
+        return self.language_map.get(key, key)
+    def print(self):
+        print("Use Language:", self.language)

infer-web.py ADDED Viewed

	@@ -0,0 +1,1999 @@

+import os
+import shutil
+import sys
+# os.environ["CUDA_VISIBLE_DEVICES"] = ""
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+import traceback, pdb
+import warnings
+import numpy as np
+import torch
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
+os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1"
+import logging
+import threading
+from random import shuffle
+from subprocess import Popen
+from time import sleep
+import faiss
+import ffmpeg
+import gradio as gr
+import soundfile as sf
+from config import Config
+from fairseq import checkpoint_utils
+from i18n import I18nAuto
+from infer_pack.models import (
+    SynthesizerTrnMs256NSFsid,
+    SynthesizerTrnMs256NSFsid_nono,
+    SynthesizerTrnMs768NSFsid,
+    SynthesizerTrnMs768NSFsid_nono,
+)
+from infer_pack.models_onnx import SynthesizerTrnMsNSFsidM
+from infer_uvr5 import _audio_pre_, _audio_pre_new
+from MDXNet import MDXNetDereverb
+from my_utils import load_audio
+from train.process_ckpt import change_info, extract_small_model, merge, show_info
+from vc_infer_pipeline import VC
+from sklearn.cluster import MiniBatchKMeans
+logging.getLogger("numba").setLevel(logging.WARNING)
+tmp = os.path.join(now_dir, "TEMP")
+shutil.rmtree(tmp, ignore_errors=True)
+shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True)
+shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True)
+os.makedirs(tmp, exist_ok=True)
+os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
+os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True)
+os.environ["TEMP"] = tmp
+warnings.filterwarnings("ignore")
+torch.manual_seed(114514)
+config = Config()
+i18n = I18nAuto()
+i18n.print()
+# 判断是否有能用来训练和加速推理的N卡
+ngpu = torch.cuda.device_count()
+gpu_infos = []
+mem = []
+if_gpu_ok = False
+if torch.cuda.is_available() or ngpu != 0:
+    for i in range(ngpu):
+        gpu_name = torch.cuda.get_device_name(i)
+        if any(
+            value in gpu_name.upper()
+            for value in [
+                "10",
+                "16",
+                "20",
+                "30",
+                "40",
+                "A2",
+                "A3",
+                "A4",
+                "P4",
+                "A50",
+                "500",
+                "A60",
+                "70",
+                "80",
+                "90",
+                "M4",
+                "T4",
+                "TITAN",
+            ]
+        ):
+            # A10#A100#V100#A40#P40#M40#K80#A4500
+            if_gpu_ok = True  # 至少有一张能用的N卡
+            gpu_infos.append("%s\t%s" % (i, gpu_name))
+            mem.append(
+                int(
+                    torch.cuda.get_device_properties(i).total_memory
+                    / 1024
+                    / 1024
+                    / 1024
+                    + 0.4
+                )
+            )
+if if_gpu_ok and len(gpu_infos) > 0:
+    gpu_info = "\n".join(gpu_infos)
+    default_batch_size = 1
+else:
+    gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练")
+    default_batch_size = 1
+gpus = "-".join([i[0] for i in gpu_infos])
+class ToolButton(gr.Button, gr.components.FormComponent):
+    """Small button with single emoji as text, fits inside gradio forms"""
+    def __init__(self, **kwargs):
+        super().__init__(variant="tool", **kwargs)
+    def get_block_name(self):
+        return "button"
+hubert_model = None
+def load_hubert():
+    global hubert_model
+    models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
+        ["hubert_base.pt"],
+        suffix="",
+    )
+    hubert_model = models[0]
+    hubert_model = hubert_model.to(config.device)
+    if config.is_half:
+        hubert_model = hubert_model.half()
+    else:
+        hubert_model = hubert_model.float()
+    hubert_model.eval()
+weight_root = "weights"
+weight_uvr5_root = "uvr5_weights"
+index_root = "logs"
+names = []
+for name in os.listdir(weight_root):
+    if name.endswith(".pth"):
+        names.append(name)
+index_paths = []
+for root, dirs, files in os.walk(index_root, topdown=False):
+    for name in files:
+        if name.endswith(".index") and "trained" not in name:
+            index_paths.append("%s/%s" % (root, name))
+uvr5_names = []
+for name in os.listdir(weight_uvr5_root):
+    if name.endswith(".pth") or "onnx" in name:
+        uvr5_names.append(name.replace(".pth", ""))
+def vc_single(
+    sid,
+    input_audio_path,
+    f0_up_key,
+    f0_file,
+    f0_method,
+    file_index,
+    file_index2,
+    # file_big_npy,
+    index_rate,
+    filter_radius,
+    resample_sr,
+    rms_mix_rate,
+    protect,
+):  # spk_item, input_audio0, vc_transform0,f0_file,f0method0
+    global tgt_sr, net_g, vc, hubert_model, version
+    if input_audio_path is None:
+        return "You need to upload an audio", None
+    f0_up_key = int(f0_up_key)
+    try:
+        audio = load_audio(input_audio_path, 16000)
+        audio_max = np.abs(audio).max() / 0.95
+        if audio_max > 1:
+            audio /= audio_max
+        times = [0, 0, 0]
+        if not hubert_model:
+            load_hubert()
+        if_f0 = cpt.get("f0", 1)
+        file_index = (
+            (
+                file_index.strip(" ")
+                .strip('"')
+                .strip("\n")
+                .strip('"')
+                .strip(" ")
+                .replace("trained", "added")
+            )
+            if file_index != ""
+            else file_index2
+        )  # 防止小白写错，自动帮他替换掉
+        # file_big_npy = (
+        #     file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+        # )
+        audio_opt = vc.pipeline(
+            hubert_model,
+            net_g,
+            sid,
+            audio,
+            input_audio_path,
+            times,
+            f0_up_key,
+            f0_method,
+            file_index,
+            # file_big_npy,
+            index_rate,
+            if_f0,
+            filter_radius,
+            tgt_sr,
+            resample_sr,
+            rms_mix_rate,
+            version,
+            protect,
+            f0_file=f0_file,
+        )
+        if tgt_sr != resample_sr >= 16000:
+            tgt_sr = resample_sr
+        index_info = (
+            "Using index:%s." % file_index
+            if os.path.exists(file_index)
+            else "Index not used."
+        )
+        return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
+            index_info,
+            times[0],
+            times[1],
+            times[2],
+        ), (tgt_sr, audio_opt)
+    except:
+        info = traceback.format_exc()
+        print(info)
+        return info, (None, None)
+def vc_multi(
+    sid,
+    dir_path,
+    opt_root,
+    paths,
+    f0_up_key,
+    f0_method,
+    file_index,
+    file_index2,
+    # file_big_npy,
+    index_rate,
+    filter_radius,
+    resample_sr,
+    rms_mix_rate,
+    protect,
+    format1,
+):
+    try:
+        dir_path = (
+            dir_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+        )  # 防止小白拷路径头尾带了空格和"和回车
+        opt_root = opt_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+        os.makedirs(opt_root, exist_ok=True)
+        try:
+            if dir_path != "":
+                paths = [os.path.join(dir_path, name) for name in os.listdir(dir_path)]
+            else:
+                paths = [path.name for path in paths]
+        except:
+            traceback.print_exc()
+            paths = [path.name for path in paths]
+        infos = []
+        for path in paths:
+            info, opt = vc_single(
+                sid,
+                path,
+                f0_up_key,
+                None,
+                f0_method,
+                file_index,
+                file_index2,
+                # file_big_npy,
+                index_rate,
+                filter_radius,
+                resample_sr,
+                rms_mix_rate,
+                protect,
+            )
+            if "Success" in info:
+                try:
+                    tgt_sr, audio_opt = opt
+                    if format1 in ["wav", "flac"]:
+                        sf.write(
+                            "%s/%s.%s" % (opt_root, os.path.basename(path), format1),
+                            audio_opt,
+                            tgt_sr,
+                        )
+                    else:
+                        path = "%s/%s.wav" % (opt_root, os.path.basename(path))
+                        sf.write(
+                            path,
+                            audio_opt,
+                            tgt_sr,
+                        )
+                        if os.path.exists(path):
+                            os.system(
+                                "ffmpeg -i %s -vn %s -q:a 2 -y"
+                                % (path, path[:-4] + ".%s" % format1)
+                            )
+                except:
+                    info += traceback.format_exc()
+            infos.append("%s->%s" % (os.path.basename(path), info))
+            yield "\n".join(infos)
+        yield "\n".join(infos)
+    except:
+        yield traceback.format_exc()
+def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0):
+    infos = []
+    try:
+        inp_root = inp_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+        save_root_vocal = (
+            save_root_vocal.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+        )
+        save_root_ins = (
+            save_root_ins.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+        )
+        if model_name == "onnx_dereverb_By_FoxJoy":
+            pre_fun = MDXNetDereverb(15)
+        else:
+            func = _audio_pre_ if "DeEcho" not in model_name else _audio_pre_new
+            pre_fun = func(
+                agg=int(agg),
+                model_path=os.path.join(weight_uvr5_root, model_name + ".pth"),
+                device=config.device,
+                is_half=config.is_half,
+            )
+        if inp_root != "":
+            paths = [os.path.join(inp_root, name) for name in os.listdir(inp_root)]
+        else:
+            paths = [path.name for path in paths]
+        for path in paths:
+            inp_path = os.path.join(inp_root, path)
+            need_reformat = 1
+            done = 0
+            try:
+                info = ffmpeg.probe(inp_path, cmd="ffprobe")
+                if (
+                    info["streams"][0]["channels"] == 2
+                    and info["streams"][0]["sample_rate"] == "44100"
+                ):
+                    need_reformat = 0
+                    pre_fun._path_audio_(
+                        inp_path, save_root_ins, save_root_vocal, format0
+                    )
+                    done = 1
+            except:
+                need_reformat = 1
+                traceback.print_exc()
+            if need_reformat == 1:
+                tmp_path = "%s/%s.reformatted.wav" % (tmp, os.path.basename(inp_path))
+                os.system(
+                    "ffmpeg -i %s -vn -acodec pcm_s16le -ac 2 -ar 44100 %s -y"
+                    % (inp_path, tmp_path)
+                )
+                inp_path = tmp_path
+            try:
+                if done == 0:
+                    pre_fun._path_audio_(
+                        inp_path, save_root_ins, save_root_vocal, format0
+                    )
+                infos.append("%s->Success" % (os.path.basename(inp_path)))
+                yield "\n".join(infos)
+            except:
+                infos.append(
+                    "%s->%s" % (os.path.basename(inp_path), traceback.format_exc())
+                )
+                yield "\n".join(infos)
+    except:
+        infos.append(traceback.format_exc())
+        yield "\n".join(infos)
+    finally:
+        try:
+            if model_name == "onnx_dereverb_By_FoxJoy":
+                del pre_fun.pred.model
+                del pre_fun.pred.model_
+            else:
+                del pre_fun.model
+                del pre_fun
+        except:
+            traceback.print_exc()
+        print("clean_empty_cache")
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    yield "\n".join(infos)
+# 一个选项卡全局只能有一个音色
+def get_vc(sid, to_return_protect0, to_return_protect1):
+    global n_spk, tgt_sr, net_g, vc, cpt, version
+    if sid == "" or sid == []:
+        global hubert_model
+        if hubert_model is not None:  # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
+            print("clean_empty_cache")
+            del net_g, n_spk, vc, hubert_model, tgt_sr  # ,cpt
+            hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            ###楼下不这么折腾清理不干净
+            if_f0 = cpt.get("f0", 1)
+            version = cpt.get("version", "v1")
+            if version == "v1":
+                if if_f0 == 1:
+                    net_g = SynthesizerTrnMs256NSFsid(
+                        *cpt["config"], is_half=config.is_half
+                    )
+                else:
+                    net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
+            elif version == "v2":
+                if if_f0 == 1:
+                    net_g = SynthesizerTrnMs768NSFsid(
+                        *cpt["config"], is_half=config.is_half
+                    )
+                else:
+                    net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
+            del net_g, cpt
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            cpt = None
+        return {"visible": False, "__type__": "update"}
+    person = "%s/%s" % (weight_root, sid)
+    print("loading %s" % person)
+    cpt = torch.load(person, map_location="cpu")
+    tgt_sr = cpt["config"][-1]
+    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
+    if_f0 = cpt.get("f0", 1)
+    if if_f0 == 0:
+        to_return_protect0 = to_return_protect1 = {
+            "visible": False,
+            "value": 0.5,
+            "__type__": "update",
+        }
+    else:
+        to_return_protect0 = {
+            "visible": True,
+            "value": to_return_protect0,
+            "__type__": "update",
+        }
+        to_return_protect1 = {
+            "visible": True,
+            "value": to_return_protect1,
+            "__type__": "update",
+        }
+    version = cpt.get("version", "v1")
+    if version == "v1":
+        if if_f0 == 1:
+            net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
+        else:
+            net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
+    elif version == "v2":
+        if if_f0 == 1:
+            net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)
+        else:
+            net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
+    del net_g.enc_q
+    print(net_g.load_state_dict(cpt["weight"], strict=False))
+    net_g.eval().to(config.device)
+    if config.is_half:
+        net_g = net_g.half()
+    else:
+        net_g = net_g.float()
+    vc = VC(tgt_sr, config)
+    n_spk = cpt["config"][-3]
+    return (
+        {"visible": True, "maximum": n_spk, "__type__": "update"},
+        to_return_protect0,
+        to_return_protect1,
+    )
+def change_choices():
+    names = []
+    for name in os.listdir(weight_root):
+        if name.endswith(".pth"):
+            names.append(name)
+    index_paths = []
+    for root, dirs, files in os.walk(index_root, topdown=False):
+        for name in files:
+            if name.endswith(".index") and "trained" not in name:
+                index_paths.append("%s/%s" % (root, name))
+    return {"choices": sorted(names), "__type__": "update"}, {
+        "choices": sorted(index_paths),
+        "__type__": "update",
+    }
+def clean():
+    return {"value": "", "__type__": "update"}
+sr_dict = {
+    "32k": 32000,
+    "40k": 40000,
+    "48k": 48000,
+}
+def if_done(done, p):
+    while 1:
+        if p.poll() is None:
+            sleep(0.5)
+        else:
+            break
+    done[0] = True
+def if_done_multi(done, ps):
+    while 1:
+        # poll==None代表进程未结束
+        # 只要有一个进程未结束都不停
+        flag = 1
+        for p in ps:
+            if p.poll() is None:
+                flag = 0
+                sleep(0.5)
+                break
+        if flag == 1:
+            break
+    done[0] = True
+def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
+    sr = sr_dict[sr]
+    os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
+    f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w")
+    f.close()
+    cmd = (
+        config.python_cmd
+        + " trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s "
+        % (trainset_dir, sr, n_p, now_dir, exp_dir)
+        + str(config.noparallel)
+    )
+    print(cmd)
+    p = Popen(cmd, shell=True)  # , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir
+    ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
+    done = [False]
+    threading.Thread(
+        target=if_done,
+        args=(
+            done,
+            p,
+        ),
+    ).start()
+    while 1:
+        with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
+            yield (f.read())
+        sleep(1)
+        if done[0]:
+            break
+    with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
+        log = f.read()
+    print(log)
+    yield log
+# but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2])
+def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19):
+    gpus = gpus.split("-")
+    os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
+    f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w")
+    f.close()
+    if if_f0:
+        cmd = config.python_cmd + " extract_f0_print.py %s/logs/%s %s %s" % (
+            now_dir,
+            exp_dir,
+            n_p,
+            f0method,
+        )
+        print(cmd)
+        p = Popen(cmd, shell=True, cwd=now_dir)  # , stdin=PIPE, stdout=PIPE,stderr=PIPE
+        ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
+        done = [False]
+        threading.Thread(
+            target=if_done,
+            args=(
+                done,
+                p,
+            ),
+        ).start()
+        while 1:
+            with open(
+                "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r"
+            ) as f:
+                yield (f.read())
+            sleep(1)
+            if done[0]:
+                break
+        with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
+            log = f.read()
+        print(log)
+        yield log
+    ####对不同part分别开多进程
+    """
+    n_part=int(sys.argv[1])
+    i_part=int(sys.argv[2])
+    i_gpu=sys.argv[3]
+    exp_dir=sys.argv[4]
+    os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
+    """
+    leng = len(gpus)
+    ps = []
+    for idx, n_g in enumerate(gpus):
+        cmd = (
+            config.python_cmd
+            + " extract_feature_print.py %s %s %s %s %s/logs/%s %s"
+            % (
+                config.device,
+                leng,
+                idx,
+                n_g,
+                now_dir,
+                exp_dir,
+                version19,
+            )
+        )
+        print(cmd)
+        p = Popen(
+            cmd, shell=True, cwd=now_dir
+        )  # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
+        ps.append(p)
+    ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
+    done = [False]
+    threading.Thread(
+        target=if_done_multi,
+        args=(
+            done,
+            ps,
+        ),
+    ).start()
+    while 1:
+        with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
+            yield (f.read())
+        sleep(1)
+        if done[0]:
+            break
+    with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
+        log = f.read()
+    print(log)
+    yield log
+def change_sr2(sr2, if_f0_3, version19):
+    path_str = "" if version19 == "v1" else "_v2"
+    f0_str = "f0" if if_f0_3 else ""
+    if_pretrained_generator_exist = os.access(
+        "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK
+    )
+    if_pretrained_discriminator_exist = os.access(
+        "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK
+    )
+    if not if_pretrained_generator_exist:
+        print(
+            "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2),
+            "not exist, will not use pretrained model",
+        )
+    if not if_pretrained_discriminator_exist:
+        print(
+            "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2),
+            "not exist, will not use pretrained model",
+        )
+    return (
+        "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)
+        if if_pretrained_generator_exist
+        else "",
+        "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
+        if if_pretrained_discriminator_exist
+        else "",
+    )
+def change_version19(sr2, if_f0_3, version19):
+    path_str = "" if version19 == "v1" else "_v2"
+    if sr2 == "32k" and version19 == "v1":
+        sr2 = "40k"
+    to_return_sr2 = (
+        {"choices": ["40k", "48k"], "__type__": "update", "value": sr2}
+        if version19 == "v1"
+        else {"choices": ["40k", "48k", "32k"], "__type__": "update", "value": sr2}
+    )
+    f0_str = "f0" if if_f0_3 else ""
+    if_pretrained_generator_exist = os.access(
+        "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK
+    )
+    if_pretrained_discriminator_exist = os.access(
+        "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK
+    )
+    if not if_pretrained_generator_exist:
+        print(
+            "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2),
+            "not exist, will not use pretrained model",
+        )
+    if not if_pretrained_discriminator_exist:
+        print(
+            "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2),
+            "not exist, will not use pretrained model",
+        )
+    return (
+        "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)
+        if if_pretrained_generator_exist
+        else "",
+        "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
+        if if_pretrained_discriminator_exist
+        else "",
+        to_return_sr2,
+    )
+def change_f0(if_f0_3, sr2, version19):  # f0method8,pretrained_G14,pretrained_D15
+    path_str = "" if version19 == "v1" else "_v2"
+    if_pretrained_generator_exist = os.access(
+        "pretrained%s/f0G%s.pth" % (path_str, sr2), os.F_OK
+    )
+    if_pretrained_discriminator_exist = os.access(
+        "pretrained%s/f0D%s.pth" % (path_str, sr2), os.F_OK
+    )
+    if not if_pretrained_generator_exist:
+        print(
+            "pretrained%s/f0G%s.pth" % (path_str, sr2),
+            "not exist, will not use pretrained model",
+        )
+    if not if_pretrained_discriminator_exist:
+        print(
+            "pretrained%s/f0D%s.pth" % (path_str, sr2),
+            "not exist, will not use pretrained model",
+        )
+    if if_f0_3:
+        return (
+            {"visible": True, "__type__": "update"},
+            "pretrained%s/f0G%s.pth" % (path_str, sr2)
+            if if_pretrained_generator_exist
+            else "",
+            "pretrained%s/f0D%s.pth" % (path_str, sr2)
+            if if_pretrained_discriminator_exist
+            else "",
+        )
+    return (
+        {"visible": False, "__type__": "update"},
+        ("pretrained%s/G%s.pth" % (path_str, sr2))
+        if if_pretrained_generator_exist
+        else "",
+        ("pretrained%s/D%s.pth" % (path_str, sr2))
+        if if_pretrained_discriminator_exist
+        else "",
+    )
+# but3.click(click_train,[exp_dir1,sr2,if_f0_3,save_epoch10,total_epoch11,batch_size12,if_save_latest13,pretrained_G14,pretrained_D15,gpus16])
+def click_train(
+    exp_dir1,
+    sr2,
+    if_f0_3,
+    spk_id5,
+    save_epoch10,
+    total_epoch11,
+    batch_size12,
+    if_save_latest13,
+    pretrained_G14,
+    pretrained_D15,
+    gpus16,
+    if_cache_gpu17,
+    if_save_every_weights18,
+    version19,
+):
+    # 生成filelist
+    exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
+    os.makedirs(exp_dir, exist_ok=True)
+    gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir)
+    feature_dir = (
+        "%s/3_feature256" % (exp_dir)
+        if version19 == "v1"
+        else "%s/3_feature768" % (exp_dir)
+    )
+    if if_f0_3:
+        f0_dir = "%s/2a_f0" % (exp_dir)
+        f0nsf_dir = "%s/2b-f0nsf" % (exp_dir)
+        names = (
+            set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
+            & set([name.split(".")[0] for name in os.listdir(feature_dir)])
+            & set([name.split(".")[0] for name in os.listdir(f0_dir)])
+            & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
+        )
+    else:
+        names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
+            [name.split(".")[0] for name in os.listdir(feature_dir)]
+        )
+    opt = []
+    for name in names:
+        if if_f0_3:
+            opt.append(
+                "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s"
+                % (
+                    gt_wavs_dir.replace("\\", "\\\\"),
+                    name,
+                    feature_dir.replace("\\", "\\\\"),
+                    name,
+                    f0_dir.replace("\\", "\\\\"),
+                    name,
+                    f0nsf_dir.replace("\\", "\\\\"),
+                    name,
+                    spk_id5,
+                )
+            )
+        else:
+            opt.append(
+                "%s/%s.wav|%s/%s.npy|%s"
+                % (
+                    gt_wavs_dir.replace("\\", "\\\\"),
+                    name,
+                    feature_dir.replace("\\", "\\\\"),
+                    name,
+                    spk_id5,
+                )
+            )
+    fea_dim = 256 if version19 == "v1" else 768
+    if if_f0_3:
+        for _ in range(2):
+            opt.append(
+                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
+                % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
+            )
+    else:
+        for _ in range(2):
+            opt.append(
+                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
+                % (now_dir, sr2, now_dir, fea_dim, spk_id5)
+            )
+    shuffle(opt)
+    with open("%s/filelist.txt" % exp_dir, "w") as f:
+        f.write("\n".join(opt))
+    print("write filelist done")
+    # 生成config#无需生成config
+    # cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0"
+    print("use gpus:", gpus16)
+    if pretrained_G14 == "":
+        print("no pretrained Generator")
+    if pretrained_D15 == "":
+        print("no pretrained Discriminator")
+    if gpus16:
+        cmd = (
+            config.python_cmd
+            + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
+            % (
+                exp_dir1,
+                sr2,
+                1 if if_f0_3 else 0,
+                batch_size12,
+                gpus16,
+                total_epoch11,
+                save_epoch10,
+                "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "",
+                "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "",
+                1 if if_save_latest13 == i18n("是") else 0,
+                1 if if_cache_gpu17 == i18n("是") else 0,
+                1 if if_save_every_weights18 == i18n("是") else 0,
+                version19,
+            )
+        )
+    else:
+        cmd = (
+            config.python_cmd
+            + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
+            % (
+                exp_dir1,
+                sr2,
+                1 if if_f0_3 else 0,
+                batch_size12,
+                total_epoch11,
+                save_epoch10,
+                "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "\b",
+                "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "\b",
+                1 if if_save_latest13 == i18n("是") else 0,
+                1 if if_cache_gpu17 == i18n("是") else 0,
+                1 if if_save_every_weights18 == i18n("是") else 0,
+                version19,
+            )
+        )
+    print(cmd)
+    p = Popen(cmd, shell=True, cwd=now_dir)
+    p.wait()
+    return "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log"
+# but4.click(train_index, [exp_dir1], info3)
+def train_index(exp_dir1, version19):
+    exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
+    os.makedirs(exp_dir, exist_ok=True)
+    feature_dir = (
+        "%s/3_feature256" % (exp_dir)
+        if version19 == "v1"
+        else "%s/3_feature768" % (exp_dir)
+    )
+    if not os.path.exists(feature_dir):
+        return "请先进行特征提取!"
+    listdir_res = list(os.listdir(feature_dir))
+    if len(listdir_res) == 0:
+        return "请先进行特征提取！"
+    infos = []
+    npys = []
+    for name in sorted(listdir_res):
+        phone = np.load("%s/%s" % (feature_dir, name))
+        npys.append(phone)
+    big_npy = np.concatenate(npys, 0)
+    big_npy_idx = np.arange(big_npy.shape[0])
+    np.random.shuffle(big_npy_idx)
+    big_npy = big_npy[big_npy_idx]
+    if big_npy.shape[0] > 2e5:
+        # if(1):
+        infos.append("Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0])
+        yield "\n".join(infos)
+        try:
+            big_npy = (
+                MiniBatchKMeans(
+                    n_clusters=10000,
+                    verbose=True,
+                    batch_size=256 * config.n_cpu,
+                    compute_labels=False,
+                    init="random",
+                )
+                .fit(big_npy)
+                .cluster_centers_
+            )
+        except:
+            info = traceback.format_exc()
+            print(info)
+            infos.append(info)
+            yield "\n".join(infos)
+    np.save("%s/total_fea.npy" % exp_dir, big_npy)
+    n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
+    infos.append("%s,%s" % (big_npy.shape, n_ivf))
+    yield "\n".join(infos)
+    index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf)
+    # index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf)
+    infos.append("training")
+    yield "\n".join(infos)
+    index_ivf = faiss.extract_index_ivf(index)  #
+    index_ivf.nprobe = 1
+    index.train(big_npy)
+    faiss.write_index(
+        index,
+        "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
+    )
+    # faiss.write_index(index, '%s/trained_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19))
+    infos.append("adding")
+    yield "\n".join(infos)
+    batch_size_add = 8192
+    for i in range(0, big_npy.shape[0], batch_size_add):
+        index.add(big_npy[i : i + batch_size_add])
+    faiss.write_index(
+        index,
+        "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
+    )
+    infos.append(
+        "成功构建索引，added_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (n_ivf, index_ivf.nprobe, exp_dir1, version19)
+    )
+    # faiss.write_index(index, '%s/added_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19))
+    # infos.append("成功构建索引，added_IVF%s_Flat_FastScan_%s.index"%(n_ivf,version19))
+    yield "\n".join(infos)
+# but5.click(train1key, [exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0method8, save_epoch10, total_epoch11, batch_size12, if_save_latest13, pretrained_G14, pretrained_D15, gpus16, if_cache_gpu17], info3)
+def train1key(
+    exp_dir1,
+    sr2,
+    if_f0_3,
+    trainset_dir4,
+    spk_id5,
+    np7,
+    f0method8,
+    save_epoch10,
+    total_epoch11,
+    batch_size12,
+    if_save_latest13,
+    pretrained_G14,
+    pretrained_D15,
+    gpus16,
+    if_cache_gpu17,
+    if_save_every_weights18,
+    version19,
+):
+    infos = []
+    def get_info_str(strr):
+        infos.append(strr)
+        return "\n".join(infos)
+    model_log_dir = "%s/logs/%s" % (now_dir, exp_dir1)
+    preprocess_log_path = "%s/preprocess.log" % model_log_dir
+    extract_f0_feature_log_path = "%s/extract_f0_feature.log" % model_log_dir
+    gt_wavs_dir = "%s/0_gt_wavs" % model_log_dir
+    feature_dir = (
+        "%s/3_feature256" % model_log_dir
+        if version19 == "v1"
+        else "%s/3_feature768" % model_log_dir
+    )
+    os.makedirs(model_log_dir, exist_ok=True)
+    #########step1:处理数据
+    open(preprocess_log_path, "w").close()
+    cmd = (
+        config.python_cmd
+        + " trainset_preprocess_pipeline_print.py %s %s %s %s "
+        % (trainset_dir4, sr_dict[sr2], np7, model_log_dir)
+        + str(config.noparallel)
+    )
+    yield get_info_str(i18n("step1:正在处理数据"))
+    yield get_info_str(cmd)
+    p = Popen(cmd, shell=True)
+    p.wait()
+    with open(preprocess_log_path, "r") as f:
+        print(f.read())
+    #########step2a:提取音高
+    open(extract_f0_feature_log_path, "w")
+    if if_f0_3:
+        yield get_info_str("step2a:正在提取音高")
+        cmd = config.python_cmd + " extract_f0_print.py %s %s %s" % (
+            model_log_dir,
+            np7,
+            f0method8,
+        )
+        yield get_info_str(cmd)
+        p = Popen(cmd, shell=True, cwd=now_dir)
+        p.wait()
+        with open(extract_f0_feature_log_path, "r") as f:
+            print(f.read())
+    else:
+        yield get_info_str(i18n("step2a:无需提取音高"))
+    #######step2b:提取特征
+    yield get_info_str(i18n("step2b:正在提取特征"))
+    gpus = gpus16.split("-")
+    leng = len(gpus)
+    ps = []
+    for idx, n_g in enumerate(gpus):
+        cmd = config.python_cmd + " extract_feature_print.py %s %s %s %s %s %s" % (
+            config.device,
+            leng,
+            idx,
+            n_g,
+            model_log_dir,
+            version19,
+        )
+        yield get_info_str(cmd)
+        p = Popen(
+            cmd, shell=True, cwd=now_dir
+        )  # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
+        ps.append(p)
+    for p in ps:
+        p.wait()
+    with open(extract_f0_feature_log_path, "r") as f:
+        print(f.read())
+    #######step3a:训练模型
+    yield get_info_str(i18n("step3a:正在训练模型"))
+    # 生成filelist
+    if if_f0_3:
+        f0_dir = "%s/2a_f0" % model_log_dir
+        f0nsf_dir = "%s/2b-f0nsf" % model_log_dir
+        names = (
+            set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
+            & set([name.split(".")[0] for name in os.listdir(feature_dir)])
+            & set([name.split(".")[0] for name in os.listdir(f0_dir)])
+            & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
+        )
+    else:
+        names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
+            [name.split(".")[0] for name in os.listdir(feature_dir)]
+        )
+    opt = []
+    for name in names:
+        if if_f0_3:
+            opt.append(
+                "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s"
+                % (
+                    gt_wavs_dir.replace("\\", "\\\\"),
+                    name,
+                    feature_dir.replace("\\", "\\\\"),
+                    name,
+                    f0_dir.replace("\\", "\\\\"),
+                    name,
+                    f0nsf_dir.replace("\\", "\\\\"),
+                    name,
+                    spk_id5,
+                )
+            )
+        else:
+            opt.append(
+                "%s/%s.wav|%s/%s.npy|%s"
+                % (
+                    gt_wavs_dir.replace("\\", "\\\\"),
+                    name,
+                    feature_dir.replace("\\", "\\\\"),
+                    name,
+                    spk_id5,
+                )
+            )
+    fea_dim = 256 if version19 == "v1" else 768
+    if if_f0_3:
+        for _ in range(2):
+            opt.append(
+                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
+                % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
+            )
+    else:
+        for _ in range(2):
+            opt.append(
+                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
+                % (now_dir, sr2, now_dir, fea_dim, spk_id5)
+            )
+    shuffle(opt)
+    with open("%s/filelist.txt" % model_log_dir, "w") as f:
+        f.write("\n".join(opt))
+    yield get_info_str("write filelist done")
+    if gpus16:
+        cmd = (
+            config.python_cmd
+            + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
+            % (
+                exp_dir1,
+                sr2,
+                1 if if_f0_3 else 0,
+                batch_size12,
+                gpus16,
+                total_epoch11,
+                save_epoch10,
+                "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "",
+                "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "",
+                1 if if_save_latest13 == i18n("是") else 0,
+                1 if if_cache_gpu17 == i18n("是") else 0,
+                1 if if_save_every_weights18 == i18n("是") else 0,
+                version19,
+            )
+        )
+    else:
+        cmd = (
+            config.python_cmd
+            + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
+            % (
+                exp_dir1,
+                sr2,
+                1 if if_f0_3 else 0,
+                batch_size12,
+                total_epoch11,
+                save_epoch10,
+                "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "",
+                "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "",
+                1 if if_save_latest13 == i18n("是") else 0,
+                1 if if_cache_gpu17 == i18n("是") else 0,
+                1 if if_save_every_weights18 == i18n("是") else 0,
+                version19,
+            )
+        )
+    yield get_info_str(cmd)
+    p = Popen(cmd, shell=True, cwd=now_dir)
+    p.wait()
+    yield get_info_str(i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log"))
+    #######step3b:训练索引
+    npys = []
+    listdir_res = list(os.listdir(feature_dir))
+    for name in sorted(listdir_res):
+        phone = np.load("%s/%s" % (feature_dir, name))
+        npys.append(phone)
+    big_npy = np.concatenate(npys, 0)
+    big_npy_idx = np.arange(big_npy.shape[0])
+    np.random.shuffle(big_npy_idx)
+    big_npy = big_npy[big_npy_idx]
+    if big_npy.shape[0] > 2e5:
+        # if(1):
+        info = "Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0]
+        print(info)
+        yield get_info_str(info)
+        try:
+            big_npy = (
+                MiniBatchKMeans(
+                    n_clusters=10000,
+                    verbose=True,
+                    batch_size=256 * config.n_cpu,
+                    compute_labels=False,
+                    init="random",
+                )
+                .fit(big_npy)
+                .cluster_centers_
+            )
+        except:
+            info = traceback.format_exc()
+            print(info)
+            yield get_info_str(info)
+    np.save("%s/total_fea.npy" % model_log_dir, big_npy)
+    # n_ivf =  big_npy.shape[0] // 39
+    n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
+    yield get_info_str("%s,%s" % (big_npy.shape, n_ivf))
+    index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf)
+    yield get_info_str("training index")
+    index_ivf = faiss.extract_index_ivf(index)  #
+    index_ivf.nprobe = 1
+    index.train(big_npy)
+    faiss.write_index(
+        index,
+        "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
+    )
+    yield get_info_str("adding index")
+    batch_size_add = 8192
+    for i in range(0, big_npy.shape[0], batch_size_add):
+        index.add(big_npy[i : i + batch_size_add])
+    faiss.write_index(
+        index,
+        "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
+    )
+    yield get_info_str(
+        "成功构建索引, added_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (n_ivf, index_ivf.nprobe, exp_dir1, version19)
+    )
+    yield get_info_str(i18n("全流程结束！"))
+#                    ckpt_path2.change(change_info_,[ckpt_path2],[sr__,if_f0__])
+def change_info_(ckpt_path):
+    if not os.path.exists(ckpt_path.replace(os.path.basename(ckpt_path), "train.log")):
+        return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
+    try:
+        with open(
+            ckpt_path.replace(os.path.basename(ckpt_path), "train.log"), "r"
+        ) as f:
+            info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1])
+            sr, f0 = info["sample_rate"], info["if_f0"]
+            version = "v2" if ("version" in info and info["version"] == "v2") else "v1"
+            return sr, str(f0), version
+    except:
+        traceback.print_exc()
+        return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
+def export_onnx(ModelPath, ExportedPath):
+    cpt = torch.load(ModelPath, map_location="cpu")
+    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]
+    vec_channels = 256 if cpt.get("version", "v1") == "v1" else 768
+    test_phone = torch.rand(1, 200, vec_channels)  # hidden unit
+    test_phone_lengths = torch.tensor([200]).long()  # hidden unit 长度（貌似没啥用）
+    test_pitch = torch.randint(size=(1, 200), low=5, high=255)  # 基频（单位赫兹）
+    test_pitchf = torch.rand(1, 200)  # nsf基频
+    test_ds = torch.LongTensor([0])  # 说话人ID
+    test_rnd = torch.rand(1, 192, 200)  # 噪声（加入随机因子）
+    device = "cpu"  # 导出时设备（不影响使用模型）
+    net_g = SynthesizerTrnMsNSFsidM(
+        *cpt["config"], is_half=False, version=cpt.get("version", "v1")
+    )  # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
+    net_g.load_state_dict(cpt["weight"], strict=False)
+    input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
+    output_names = [
+        "audio",
+    ]
+    # net_g.construct_spkmixmap(n_speaker) 多角色混合轨道导出
+    torch.onnx.export(
+        net_g,
+        (
+            test_phone.to(device),
+            test_phone_lengths.to(device),
+            test_pitch.to(device),
+            test_pitchf.to(device),
+            test_ds.to(device),
+            test_rnd.to(device),
+        ),
+        ExportedPath,
+        dynamic_axes={
+            "phone": [1],
+            "pitch": [1],
+            "pitchf": [1],
+            "rnd": [2],
+        },
+        do_constant_folding=False,
+        opset_version=13,
+        verbose=False,
+        input_names=input_names,
+        output_names=output_names,
+    )
+    return "Finished"
+with gr.Blocks() as app:
+    gr.Markdown(
+        value=i18n(
+            "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>使用需遵守的协议-LICENSE.txt</b>."
+        )
+    )
+    with gr.Tabs():
+        with gr.TabItem(i18n("模型推理")):
+            with gr.Row():
+                sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names))
+                refresh_button = gr.Button(i18n("刷新音色列表和索引路径"), variant="primary")
+                clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary")
+                spk_item = gr.Slider(
+                    minimum=0,
+                    maximum=2333,
+                    step=1,
+                    label=i18n("请选择说话人id"),
+                    value=0,
+                    visible=False,
+                    interactive=True,
+                )
+                clean_button.click(fn=clean, inputs=[], outputs=[sid0])
+            with gr.Group():
+                gr.Markdown(
+                    value=i18n("男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ")
+                )
+                with gr.Row():
+                    with gr.Column():
+                        vc_transform0 = gr.Number(
+                            label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0
+                        )
+                        input_audio0 = gr.Textbox(
+                            label=i18n("输入待处理音频文件路径(默认是正确格式示例)"),
+                            value="E:\\codes\\py39\\test-20230416b\\todo-songs\\冬之花clip1.wav",
+                        )
+                        f0method0 = gr.Radio(
+                            label=i18n(
+                                "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"
+                            ),
+                            choices=["pm", "harvest", "crepe"],
+                            value="pm",
+                            interactive=True,
+                        )
+                        filter_radius0 = gr.Slider(
+                            minimum=0,
+                            maximum=7,
+                            label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波，数值为滤波半径，使用可以削弱哑音"),
+                            value=3,
+                            step=1,
+                            interactive=True,
+                        )
+                    with gr.Column():
+                        file_index1 = gr.Textbox(
+                            label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
+                            value="",
+                            interactive=True,
+                        )
+                        file_index2 = gr.Dropdown(
+                            label=i18n("自动检测index路径,下拉式选择(dropdown)"),
+                            choices=sorted(index_paths),
+                            interactive=True,
+                        )
+                        refresh_button.click(
+                            fn=change_choices, inputs=[], outputs=[sid0, file_index2]
+                        )
+                        # file_big_npy1 = gr.Textbox(
+                        #     label=i18n("特征文件路径"),
+                        #     value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
+                        #     interactive=True,
+                        # )
+                        index_rate1 = gr.Slider(
+                            minimum=0,
+                            maximum=1,
+                            label=i18n("检索特征占比"),
+                            value=0.75,
+                            interactive=True,
+                        )
+                    with gr.Column():
+                        resample_sr0 = gr.Slider(
+                            minimum=0,
+                            maximum=48000,
+                            label=i18n("后处理重采样至最终采样率，0为不进行重采样"),
+                            value=0,
+                            step=1,
+                            interactive=True,
+                        )
+                        rms_mix_rate0 = gr.Slider(
+                            minimum=0,
+                            maximum=1,
+                            label=i18n("输入源音量包络替换输出音量包络融合比例，越靠近1越使用输出包络"),
+                            value=0.25,
+                            interactive=True,
+                        )
+                        protect0 = gr.Slider(
+                            minimum=0,
+                            maximum=0.5,
+                            label=i18n(
+                                "保护清辅音和呼吸声，防止电音撕裂等artifact，拉满0.5不开启，调低加大保护力度但可能降低索引效果"
+                            ),
+                            value=0.33,
+                            step=0.01,
+                            interactive=True,
+                        )
+                    f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"))
+                    but0 = gr.Button(i18n("转换"), variant="primary")
+                    with gr.Row():
+                        vc_output1 = gr.Textbox(label=i18n("输出信息"))
+                        vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)"))
+                    but0.click(
+                        vc_single,
+                        [
+                            spk_item,
+                            input_audio0,
+                            vc_transform0,
+                            f0_file,
+                            f0method0,
+                            file_index1,
+                            file_index2,
+                            # file_big_npy1,
+                            index_rate1,
+                            filter_radius0,
+                            resample_sr0,
+                            rms_mix_rate0,
+                            protect0,
+                        ],
+                        [vc_output1, vc_output2],
+                    )
+            with gr.Group():
+                gr.Markdown(
+                    value=i18n("批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. ")
+                )
+                with gr.Row():
+                    with gr.Column():
+                        vc_transform1 = gr.Number(
+                            label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0
+                        )
+                        opt_input = gr.Textbox(label=i18n("指定输出文件夹"), value="opt")
+                        f0method1 = gr.Radio(
+                            label=i18n(
+                                "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"
+                            ),
+                            choices=["pm", "harvest", "crepe"],
+                            value="pm",
+                            interactive=True,
+                        )
+                        filter_radius1 = gr.Slider(
+                            minimum=0,
+                            maximum=7,
+                            label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波，数值为滤波半径，使用可以削弱哑音"),
+                            value=3,
+                            step=1,
+                            interactive=True,
+                        )
+                    with gr.Column():
+                        file_index3 = gr.Textbox(
+                            label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
+                            value="",
+                            interactive=True,
+                        )
+                        file_index4 = gr.Dropdown(
+                            label=i18n("自动检测index路径,下拉式选择(dropdown)"),
+                            choices=sorted(index_paths),
+                            interactive=True,
+                        )
+                        refresh_button.click(
+                            fn=lambda: change_choices()[1],
+                            inputs=[],
+                            outputs=file_index4,
+                        )
+                        # file_big_npy2 = gr.Textbox(
+                        #     label=i18n("特征文件路径"),
+                        #     value="E:\\codes\\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
+                        #     interactive=True,
+                        # )
+                        index_rate2 = gr.Slider(
+                            minimum=0,
+                            maximum=1,
+                            label=i18n("检索特征占比"),
+                            value=1,
+                            interactive=True,
+                        )
+                    with gr.Column():
+                        resample_sr1 = gr.Slider(
+                            minimum=0,
+                            maximum=48000,
+                            label=i18n("后处理重采样至最终采样率，0为不进行重采样"),
+                            value=0,
+                            step=1,
+                            interactive=True,
+                        )
+                        rms_mix_rate1 = gr.Slider(
+                            minimum=0,
+                            maximum=1,
+                            label=i18n("输入源音量包络替换输出音量包络融合比例，越靠近1越使用输出包络"),
+                            value=1,
+                            interactive=True,
+                        )
+                        protect1 = gr.Slider(
+                            minimum=0,
+                            maximum=0.5,
+                            label=i18n(
+                                "保护清辅音和呼吸声，防止电音撕裂等artifact，拉满0.5不开启，调低加大保护力度但可能降低索引效果"
+                            ),
+                            value=0.33,
+                            step=0.01,
+                            interactive=True,
+                        )
+                    with gr.Column():
+                        dir_input = gr.Textbox(
+                            label=i18n("输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"),
+                            value="E:\codes\py39\\test-20230416b\\todo-songs",
+                        )
+                        inputs = gr.File(
+                            file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
+                        )
+                    with gr.Row():
+                        format1 = gr.Radio(
+                            label=i18n("导出文件格式"),
+                            choices=["wav", "flac", "mp3", "m4a"],
+                            value="flac",
+                            interactive=True,
+                        )
+                        but1 = gr.Button(i18n("转换"), variant="primary")
+                        vc_output3 = gr.Textbox(label=i18n("输出信息"))
+                    but1.click(
+                        vc_multi,
+                        [
+                            spk_item,
+                            dir_input,
+                            opt_input,
+                            inputs,
+                            vc_transform1,
+                            f0method1,
+                            file_index3,
+                            file_index4,
+                            # file_big_npy2,
+                            index_rate2,
+                            filter_radius1,
+                            resample_sr1,
+                            rms_mix_rate1,
+                            protect1,
+                            format1,
+                        ],
+                        [vc_output3],
+                    )
+            sid0.change(
+                fn=get_vc,
+                inputs=[sid0, protect0, protect1],
+                outputs=[spk_item, protect0, protect1],
+            )
+        with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")):
+            with gr.Group():
+                gr.Markdown(
+                    value=i18n(
+                        "人声伴奏分离批量处理， 使用UVR5模型。 <br>"
+                        "合格的文件夹路径格式举例： E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。 <br>"
+                        "模型分为三类： <br>"
+                        "1、保留人声：不带和声的音频选这个，对主人声保留比HP5更好。内置HP2和HP3两个模型，HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点； <br>"
+                        "2、仅保留主人声：带和声的音频选这个，对主人声可能有削弱。内置HP5一个模型； <br> "
+                        "3、去混响、去延迟模型（by FoxJoy）：<br>"
+                        "  (1)MDX-Net(onnx_dereverb):对于双通道混响是最好的选择，不能去除单通道混响；<br>"
+                        "&emsp;(234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底，DeReverb额外去除混响，可去除单声道混响，但是对高频重的板式混响去不干净。<br>"
+                        "去混响/去延迟，附：<br>"
+                        "1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍；<br>"
+                        "2、MDX-Net-Dereverb模型挺慢的；<br>"
+                        "3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。"
+                    )
+                )
+                with gr.Row():
+                    with gr.Column():
+                        dir_wav_input = gr.Textbox(
+                            label=i18n("输入待处理音频文件夹路径"),
+                            value="E:\\codes\\py39\\test-20230416b\\todo-songs\\todo-songs",
+                        )
+                        wav_inputs = gr.File(
+                            file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
+                        )
+                    with gr.Column():
+                        model_choose = gr.Dropdown(label=i18n("模型"), choices=uvr5_names)
+                        agg = gr.Slider(
+                            minimum=0,
+                            maximum=20,
+                            step=1,
+                            label="人声提取激进程度",
+                            value=10,
+                            interactive=True,
+                            visible=False,  # 先不开放调整
+                        )
+                        opt_vocal_root = gr.Textbox(
+                            label=i18n("指定输出主人声文件夹"), value="opt"
+                        )
+                        opt_ins_root = gr.Textbox(
+                            label=i18n("指定输出非主人声文件夹"), value="opt"
+                        )
+                        format0 = gr.Radio(
+                            label=i18n("导出文件格式"),
+                            choices=["wav", "flac", "mp3", "m4a"],
+                            value="flac",
+                            interactive=True,
+                        )
+                    but2 = gr.Button(i18n("转换"), variant="primary")
+                    vc_output4 = gr.Textbox(label=i18n("输出信息"))
+                    but2.click(
+                        uvr,
+                        [
+                            model_choose,
+                            dir_wav_input,
+                            opt_vocal_root,
+                            wav_inputs,
+                            opt_ins_root,
+                            agg,
+                            format0,
+                        ],
+                        [vc_output4],
+                    )
+        with gr.TabItem(i18n("训练")):
+            gr.Markdown(
+                value=i18n(
+                    "step1: 填写实验配置. 实验数据放在logs下, 每个实验一个文件夹, 需手工输入实验名路径, 内含实验配置, 日志, 训练得到的模型文件. "
+                )
+            )
+            with gr.Row():
+                exp_dir1 = gr.Textbox(label=i18n("输入实验名"), value="mi-test")
+                sr2 = gr.Radio(
+                    label=i18n("目标采样率"),
+                    choices=["40k", "48k"],
+                    value="40k",
+                    interactive=True,
+                )
+                if_f0_3 = gr.Radio(
+                    label=i18n("模型是否带音高指导(唱歌一定要, 语音可以不要)"),
+                    choices=[True, False],
+                    value=True,
+                    interactive=True,
+                )
+                version19 = gr.Radio(
+                    label=i18n("版本"),
+                    choices=["v1", "v2"],
+                    value="v1",
+                    interactive=True,
+                    visible=True,
+                )
+                np7 = gr.Slider(
+                    minimum=0,
+                    maximum=config.n_cpu,
+                    step=1,
+                    label=i18n("提取音高和处理数据使用的CPU进程数"),
+                    value=int(np.ceil(config.n_cpu / 1.5)),
+                    interactive=True,
+                )
+            with gr.Group():  # 暂时单人的, 后面支持最多4人的#数据处理
+                gr.Markdown(
+                    value=i18n(
+                        "step2a: 自动遍历训练文件夹下所有可解码成音频的文件并进行切片归一化, 在实验目录下生成2个wav文件夹; 暂时只支持单人训练. "
+                    )
+                )
+                with gr.Row():
+                    trainset_dir4 = gr.Textbox(
+                        label=i18n("输入训练文件夹路径"), value="E:\\语音音频+标注\\米津玄师\\src"
+                    )
+                    spk_id5 = gr.Slider(
+                        minimum=0,
+                        maximum=4,
+                        step=1,
+                        label=i18n("请指定说话人id"),
+                        value=0,
+                        interactive=True,
+                    )
+                    but1 = gr.Button(i18n("处理数据"), variant="primary")
+                    info1 = gr.Textbox(label=i18n("输出信息"), value="")
+                    but1.click(
+                        preprocess_dataset, [trainset_dir4, exp_dir1, sr2, np7], [info1]
+                    )
+            with gr.Group():
+                gr.Markdown(value=i18n("step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)"))
+                with gr.Row():
+                    with gr.Column():
+                        gpus6 = gr.Textbox(
+                            label=i18n("以-分隔输入使用的卡号, 例如   0-1-2   使用卡0和卡1和卡2"),
+                            value=gpus,
+                            interactive=True,
+                        )
+                        gpu_info9 = gr.Textbox(label=i18n("显卡信息"), value=gpu_info)
+                    with gr.Column():
+                        f0method8 = gr.Radio(
+                            label=i18n(
+                                "选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢"
+                            ),
+                            choices=["pm", "harvest", "dio"],
+                            value="harvest",
+                            interactive=True,
+                        )
+                    but2 = gr.Button(i18n("特征提取"), variant="primary")
+                    info2 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
+                    but2.click(
+                        extract_f0_feature,
+                        [gpus6, np7, f0method8, if_f0_3, exp_dir1, version19],
+                        [info2],
+                    )
+            with gr.Group():
+                gr.Markdown(value=i18n("step3: 填写训练设置, 开始训练模型和索引"))
+                with gr.Row():
+                    save_epoch10 = gr.Slider(
+                        minimum=0,
+                        maximum=50,
+                        step=1,
+                        label=i18n("保存频率save_every_epoch"),
+                        value=5,
+                        interactive=True,
+                    )
+                    total_epoch11 = gr.Slider(
+                        minimum=0,
+                        maximum=1000,
+                        step=1,
+                        label=i18n("总训练轮数total_epoch"),
+                        value=20,
+                        interactive=True,
+                    )
+                    batch_size12 = gr.Slider(
+                        minimum=1,
+                        maximum=40,
+                        step=1,
+                        label=i18n("每张显卡的batch_size"),
+                        value=default_batch_size,
+                        interactive=True,
+                    )
+                    if_save_latest13 = gr.Radio(
+                        label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"),
+                        choices=[i18n("是"), i18n("否")],
+                        value=i18n("否"),
+                        interactive=True,
+                    )
+                    if_cache_gpu17 = gr.Radio(
+                        label=i18n(
+                            "是否缓存所有训练集至显存. 10min以下小数据可缓存以加速训练, 大数据缓存会炸显存也加不了多少速"
+                        ),
+                        choices=[i18n("是"), i18n("否")],
+                        value=i18n("否"),
+                        interactive=True,
+                    )
+                    if_save_every_weights18 = gr.Radio(
+                        label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"),
+                        choices=[i18n("是"), i18n("否")],
+                        value=i18n("否"),
+                        interactive=True,
+                    )
+                with gr.Row():
+                    pretrained_G14 = gr.Textbox(
+                        label=i18n("加载预训练底模G路径"),
+                        value="pretrained/f0G40k.pth",
+                        interactive=True,
+                    )
+                    pretrained_D15 = gr.Textbox(
+                        label=i18n("加载预训练底模D路径"),
+                        value="pretrained/f0D40k.pth",
+                        interactive=True,
+                    )
+                    sr2.change(
+                        change_sr2,
+                        [sr2, if_f0_3, version19],
+                        [pretrained_G14, pretrained_D15],
+                    )
+                    version19.change(
+                        change_version19,
+                        [sr2, if_f0_3, version19],
+                        [pretrained_G14, pretrained_D15, sr2],
+                    )
+                    if_f0_3.change(
+                        change_f0,
+                        [if_f0_3, sr2, version19],
+                        [f0method8, pretrained_G14, pretrained_D15],
+                    )
+                    gpus16 = gr.Textbox(
+                        label=i18n("以-分隔输入使用的卡号, 例如   0-1-2   使用卡0和卡1和卡2"),
+                        value=gpus,
+                        interactive=True,
+                    )
+                    but3 = gr.Button(i18n("训练模型"), variant="primary")
+                    but4 = gr.Button(i18n("训练特征索引"), variant="primary")
+                    but5 = gr.Button(i18n("一键训练"), variant="primary")
+                    info3 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=10)
+                    but3.click(
+                        click_train,
+                        [
+                            exp_dir1,
+                            sr2,
+                            if_f0_3,
+                            spk_id5,
+                            save_epoch10,
+                            total_epoch11,
+                            batch_size12,
+                            if_save_latest13,
+                            pretrained_G14,
+                            pretrained_D15,
+                            gpus16,
+                            if_cache_gpu17,
+                            if_save_every_weights18,
+                            version19,
+                        ],
+                        info3,
+                    )
+                    but4.click(train_index, [exp_dir1, version19], info3)
+                    but5.click(
+                        train1key,
+                        [
+                            exp_dir1,
+                            sr2,
+                            if_f0_3,
+                            trainset_dir4,
+                            spk_id5,
+                            np7,
+                            f0method8,
+                            save_epoch10,
+                            total_epoch11,
+                            batch_size12,
+                            if_save_latest13,
+                            pretrained_G14,
+                            pretrained_D15,
+                            gpus16,
+                            if_cache_gpu17,
+                            if_save_every_weights18,
+                            version19,
+                        ],
+                        info3,
+                    )
+        with gr.TabItem(i18n("ckpt处理")):
+            with gr.Group():
+                gr.Markdown(value=i18n("模型融合, 可用于测试音色融合"))
+                with gr.Row():
+                    ckpt_a = gr.Textbox(label=i18n("A模型路径"), value="", interactive=True)
+                    ckpt_b = gr.Textbox(label=i18n("B模型路径"), value="", interactive=True)
+                    alpha_a = gr.Slider(
+                        minimum=0,
+                        maximum=1,
+                        label=i18n("A模型权重"),
+                        value=0.5,
+                        interactive=True,
+                    )
+                with gr.Row():
+                    sr_ = gr.Radio(
+                        label=i18n("目标采样率"),
+                        choices=["40k", "48k"],
+                        value="40k",
+                        interactive=True,
+                    )
+                    if_f0_ = gr.Radio(
+                        label=i18n("模型是否带音高指导"),
+                        choices=[i18n("是"), i18n("否")],
+                        value=i18n("是"),
+                        interactive=True,
+                    )
+                    info__ = gr.Textbox(
+                        label=i18n("要置入的模型信息"), value="", max_lines=8, interactive=True
+                    )
+                    name_to_save0 = gr.Textbox(
+                        label=i18n("保存的模型名不带后缀"),
+                        value="",
+                        max_lines=1,
+                        interactive=True,
+                    )
+                    version_2 = gr.Radio(
+                        label=i18n("模型版本型号"),
+                        choices=["v1", "v2"],
+                        value="v1",
+                        interactive=True,
+                    )
+                with gr.Row():
+                    but6 = gr.Button(i18n("融合"), variant="primary")
+                    info4 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
+                but6.click(
+                    merge,
+                    [
+                        ckpt_a,
+                        ckpt_b,
+                        alpha_a,
+                        sr_,
+                        if_f0_,
+                        info__,
+                        name_to_save0,
+                        version_2,
+                    ],
+                    info4,
+                )  # def merge(path1,path2,alpha1,sr,f0,info):
+            with gr.Group():
+                gr.Markdown(value=i18n("修改模型信息(仅支持weights文件夹下提取的小模型文件)"))
+                with gr.Row():
+                    ckpt_path0 = gr.Textbox(
+                        label=i18n("模型路径"), value="", interactive=True
+                    )
+                    info_ = gr.Textbox(
+                        label=i18n("要改的模型信息"), value="", max_lines=8, interactive=True
+                    )
+                    name_to_save1 = gr.Textbox(
+                        label=i18n("保存的文件名, 默认空为和源文件同名"),
+                        value="",
+                        max_lines=8,
+                        interactive=True,
+                    )
+                with gr.Row():
+                    but7 = gr.Button(i18n("修改"), variant="primary")
+                    info5 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
+                but7.click(change_info, [ckpt_path0, info_, name_to_save1], info5)
+            with gr.Group():
+                gr.Markdown(value=i18n("查看模型信息(仅支持weights文件夹下提取的小模型文件)"))
+                with gr.Row():
+                    ckpt_path1 = gr.Textbox(
+                        label=i18n("模型路径"), value="", interactive=True
+                    )
+                    but8 = gr.Button(i18n("查看"), variant="primary")
+                    info6 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
+                but8.click(show_info, [ckpt_path1], info6)
+            with gr.Group():
+                gr.Markdown(
+                    value=i18n(
+                        "模型提取(输入logs文件夹下大文件模型路径),适用于训一半不想训了模型没有自动提取保存小文件模型,或者想测试中间模型的情况"
+                    )
+                )
+                with gr.Row():
+                    ckpt_path2 = gr.Textbox(
+                        label=i18n("模型路径"),
+                        value="E:\\codes\\py39\\logs\\mi-test_f0_48k\\G_23333.pth",
+                        interactive=True,
+                    )
+                    save_name = gr.Textbox(
+                        label=i18n("保存名"), value="", interactive=True
+                    )
+                    sr__ = gr.Radio(
+                        label=i18n("目标采样率"),
+                        choices=["32k", "40k", "48k"],
+                        value="40k",
+                        interactive=True,
+                    )
+                    if_f0__ = gr.Radio(
+                        label=i18n("模型是否带音高指导,1是0否"),
+                        choices=["1", "0"],
+                        value="1",
+                        interactive=True,
+                    )
+                    version_1 = gr.Radio(
+                        label=i18n("模型版本型号"),
+                        choices=["v1", "v2"],
+                        value="v2",
+                        interactive=True,
+                    )
+                    info___ = gr.Textbox(
+                        label=i18n("要置入的模型信息"), value="", max_lines=8, interactive=True
+                    )
+                    but9 = gr.Button(i18n("提取"), variant="primary")
+                    info7 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
+                    ckpt_path2.change(
+                        change_info_, [ckpt_path2], [sr__, if_f0__, version_1]
+                    )
+                but9.click(
+                    extract_small_model,
+                    [ckpt_path2, save_name, sr__, if_f0__, info___, version_1],
+                    info7,
+                )
+        with gr.TabItem(i18n("Onnx导出")):
+            with gr.Row():
+                ckpt_dir = gr.Textbox(label=i18n("RVC模型路径"), value="", interactive=True)
+            with gr.Row():
+                onnx_dir = gr.Textbox(
+                    label=i18n("Onnx输出路径"), value="", interactive=True
+                )
+            with gr.Row():
+                infoOnnx = gr.Label(label="info")
+            with gr.Row():
+                butOnnx = gr.Button(i18n("导出Onnx模型"), variant="primary")
+            butOnnx.click(export_onnx, [ckpt_dir, onnx_dir], infoOnnx)
+        tab_faq = i18n("常见问题解答")
+        with gr.TabItem(tab_faq):
+            try:
+                if tab_faq == "常见问题解答":
+                    with open("docs/faq.md", "r", encoding="utf8") as f:
+                        info = f.read()
+                else:
+                    with open("docs/faq_en.md", "r", encoding="utf8") as f:
+                        info = f.read()
+                gr.Markdown(value=info)
+            except:
+                gr.Markdown(traceback.format_exc())
+        # with gr.TabItem(i18n("招募音高曲线前端编辑器")):
+        #     gr.Markdown(value=i18n("加开发群联系我xxxxx"))
+        # with gr.TabItem(i18n("点击查看交流、问题反馈群号")):
+        #     gr.Markdown(value=i18n("xxxxx"))
+    if config.iscolab:
+        app.queue(concurrency_count=511, max_size=1022).launch(share=True)
+    else:
+        app.queue(concurrency_count=511, max_size=1022).launch(
+            server_name="0.0.0.0",
+            inbrowser=not config.noautoopen,
+            server_port=config.listen_port,
+            quiet=True,
+        )

infer_batch_rvc.py ADDED Viewed

	@@ -0,0 +1,220 @@

+"""
+v1
+runtime\python.exe myinfer-v2-0528.py 0 "E:\codes\py39\RVC-beta\todo-songs" "E:\codes\py39\logs\mi-test\added_IVF677_Flat_nprobe_7.index" harvest "E:\codes\py39\RVC-beta\output" "E:\codes\py39\test-20230416b\weights\mi-test.pth" 0.66 cuda:0 True 3 0 1 0.33
+v2
+runtime\python.exe myinfer-v2-0528.py 0 "E:\codes\py39\RVC-beta\todo-songs" "E:\codes\py39\test-20230416b\logs\mi-test-v2\aadded_IVF677_Flat_nprobe_1_v2.index" harvest "E:\codes\py39\RVC-beta\output_v2" "E:\codes\py39\test-20230416b\weights\mi-test-v2.pth" 0.66 cuda:0 True 3 0 1 0.33
+"""
+import os, sys, pdb, torch
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+import argparse
+import glob
+import sys
+import torch
+import tqdm as tq
+from multiprocessing import cpu_count
+class Config:
+    def __init__(self, device, is_half):
+        self.device = device
+        self.is_half = is_half
+        self.n_cpu = 0
+        self.gpu_name = None
+        self.gpu_mem = None
+        self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
+    def device_config(self) -> tuple:
+        if torch.cuda.is_available():
+            i_device = int(self.device.split(":")[-1])
+            self.gpu_name = torch.cuda.get_device_name(i_device)
+            if (
+                ("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
+                or "P40" in self.gpu_name.upper()
+                or "1060" in self.gpu_name
+                or "1070" in self.gpu_name
+                or "1080" in self.gpu_name
+            ):
+                print("16系/10系显卡和P40强制单精度")
+                self.is_half = False
+                for config_file in ["32k.json", "40k.json", "48k.json"]:
+                    with open(f"configs/{config_file}", "r") as f:
+                        strr = f.read().replace("true", "false")
+                    with open(f"configs/{config_file}", "w") as f:
+                        f.write(strr)
+                with open("trainset_preprocess_pipeline_print.py", "r") as f:
+                    strr = f.read().replace("3.7", "3.0")
+                with open("trainset_preprocess_pipeline_print.py", "w") as f:
+                    f.write(strr)
+            else:
+                self.gpu_name = None
+            self.gpu_mem = int(
+                torch.cuda.get_device_properties(i_device).total_memory
+                / 1024
+                / 1024
+                / 1024
+                + 0.4
+            )
+            if self.gpu_mem <= 4:
+                with open("trainset_preprocess_pipeline_print.py", "r") as f:
+                    strr = f.read().replace("3.7", "3.0")
+                with open("trainset_preprocess_pipeline_print.py", "w") as f:
+                    f.write(strr)
+        elif torch.backends.mps.is_available():
+            print("没有发现支持的N卡, 使用MPS进行推理")
+            self.device = "mps"
+        else:
+            print("没有发现支持的N卡, 使用CPU进行推理")
+            self.device = "cpu"
+            self.is_half = True
+        if self.n_cpu == 0:
+            self.n_cpu = cpu_count()
+        if self.is_half:
+            # 6G显存配置
+            x_pad = 3
+            x_query = 10
+            x_center = 60
+            x_max = 65
+        else:
+            # 5G显存配置
+            x_pad = 1
+            x_query = 6
+            x_center = 38
+            x_max = 41
+        if self.gpu_mem != None and self.gpu_mem <= 4:
+            x_pad = 1
+            x_query = 5
+            x_center = 30
+            x_max = 32
+        return x_pad, x_query, x_center, x_max
+f0up_key = sys.argv[1] #-12.0
+input_path = sys.argv[2]
+index_path = ''
+abc = sys.argv[3]
+f0method = sys.argv[4]  # harvest or pm
+opt_path = sys.argv[5]
+model_path = sys.argv[6]
+index_rate = float(sys.argv[7])
+device = sys.argv[8]
+device = None
+is_half = bool(sys.argv[9])
+is_half = None
+filter_radius = int(sys.argv[10])
+resample_sr = int(sys.argv[11])
+rms_mix_rate = float(sys.argv[12])
+protect = float(sys.argv[13])
+print(sys.argv)
+config = Config(device, is_half)
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+from vc_infer_pipeline import VC
+from infer_pack.models import (
+    SynthesizerTrnMs256NSFsid,
+    SynthesizerTrnMs256NSFsid_nono,
+    SynthesizerTrnMs768NSFsid,
+    SynthesizerTrnMs768NSFsid_nono,
+)
+from my_utils import load_audio
+from fairseq import checkpoint_utils
+from scipy.io import wavfile
+hubert_model = None
+def load_hubert():
+    global hubert_model
+    models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
+        ["hubert_base.pt"],
+        suffix="",
+    )
+    hubert_model = models[0]
+    hubert_model = hubert_model.to(device)
+    if is_half:
+        hubert_model = hubert_model.half()
+    else:
+        hubert_model = hubert_model.float()
+    hubert_model.eval()
+def vc_single(sid, input_audio, f0_up_key, f0_file, f0_method, file_index, index_rate):
+    global tgt_sr, net_g, vc, hubert_model, version
+    if input_audio is None:
+        return "You need to upload an audio", None
+    f0_up_key = int(f0_up_key)
+    audio = load_audio(input_audio, 16000)
+    times = [0, 0, 0]
+    if hubert_model == None:
+        load_hubert()
+    if_f0 = cpt.get("f0", 1)
+    # audio_opt=vc.pipeline(hubert_model,net_g,sid,audio,times,f0_up_key,f0_method,file_index,file_big_npy,index_rate,if_f0,f0_file=f0_file)
+    audio_opt = vc.pipeline(
+        hubert_model,
+        net_g,
+        sid,
+        audio,
+        input_audio,
+        times,
+        f0_up_key,
+        f0_method,
+        file_index,
+        index_rate,
+        if_f0,
+        filter_radius,
+        tgt_sr,
+        resample_sr,
+        rms_mix_rate,
+        version,
+        protect,
+        f0_file=f0_file,
+    )
+    print(times)
+    return audio_opt
+def get_vc(model_path):
+    global n_spk, tgt_sr, net_g, vc, cpt, device, is_half, version
+    print("loading pth %s" % model_path)
+    cpt = torch.load(model_path, map_location="cpu")
+    tgt_sr = cpt["config"][-1]
+    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
+    if_f0 = cpt.get("f0", 1)
+    version = cpt.get("version", "v1")
+    if version == "v1":
+        if if_f0 == 1:
+            net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half)
+        else:
+            net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
+    elif version == "v2":
+        if if_f0 == 1:  #
+            net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=is_half)
+        else:
+            net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
+    del net_g.enc_q
+    print(net_g.load_state_dict(cpt["weight"], strict=False))  # 不加这一行清不干净，真奇葩
+    net_g.eval().to(device)
+    if is_half:
+        net_g = net_g.half()
+    else:
+        net_g = net_g.float()
+    vc = VC(tgt_sr, config)
+    n_spk = cpt["config"][-3]
+    # return {"visible": True,"maximum": n_spk, "__type__": "update"}
+get_vc(model_path)
+audios = os.listdir(input_path)
+for file in tq.tqdm(audios):
+    if file.endswith(".wav"):
+        file_path = input_path + "/" + file
+        wav_opt = vc_single(
+            0, file_path, f0up_key, None, f0method, index_path, index_rate
+        )
+        out_path = opt_path + "/" + file
+        wavfile.write(out_path, tgt_sr, wav_opt)

infer_uvr5.py ADDED Viewed

	@@ -0,0 +1,364 @@

+import os, sys, torch, warnings, pdb
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+from json import load as ll
+warnings.filterwarnings("ignore")
+import librosa
+import importlib
+import numpy as np
+import hashlib, math
+from tqdm import tqdm
+from uvr5_pack.lib_v5 import spec_utils
+from uvr5_pack.utils import _get_name_params, inference
+from uvr5_pack.lib_v5.model_param_init import ModelParameters
+import soundfile as sf
+from uvr5_pack.lib_v5.nets_new import CascadedNet
+from uvr5_pack.lib_v5 import nets_61968KB as nets
+class _audio_pre_:
+    def __init__(self, agg, model_path, device, is_half):
+        self.model_path = model_path
+        self.device = device
+        self.data = {
+            # Processing Options
+            "postprocess": False,
+            "tta": False,
+            # Constants
+            "window_size": 512,
+            "agg": agg,
+            "high_end_process": "mirroring",
+        }
+        mp = ModelParameters("uvr5_pack/lib_v5/modelparams/4band_v2.json")
+        model = nets.CascadedASPPNet(mp.param["bins"] * 2)
+        cpk = torch.load(model_path, map_location="cpu")
+        model.load_state_dict(cpk)
+        model.eval()
+        if is_half:
+            model = model.half().to(device)
+        else:
+            model = model.to(device)
+        self.mp = mp
+        self.model = model
+    def _path_audio_(self, music_file, ins_root=None, vocal_root=None, format="flac"):
+        if ins_root is None and vocal_root is None:
+            return "No save root."
+        if ins_root is not None:
+            os.makedirs(ins_root, exist_ok=True)
+        if vocal_root is not None:
+            os.makedirs(vocal_root, exist_ok=True)
+        X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {}
+        bands_n = len(self.mp.param["band"])
+        # print(bands_n)
+        for d in range(bands_n, 0, -1):
+            bp = self.mp.param["band"][d]
+            if d == bands_n:  # high-end band
+                (
+                    X_wave[d],
+                    _,
+                ) = librosa.core.load(
+                    music_file,
+                    bp["sr"],
+                    False,
+                    dtype=np.float32,
+                    res_type=bp["res_type"],
+                )
+                if X_wave[d].ndim == 1:
+                    X_wave[d] = np.asfortranarray([X_wave[d], X_wave[d]])
+            else:  # lower bands
+                X_wave[d] = librosa.core.resample(
+                    X_wave[d + 1],
+                    self.mp.param["band"][d + 1]["sr"],
+                    bp["sr"],
+                    res_type=bp["res_type"],
+                )
+            # Stft of wave source
+            X_spec_s[d] = spec_utils.wave_to_spectrogram_mt(
+                X_wave[d],
+                bp["hl"],
+                bp["n_fft"],
+                self.mp.param["mid_side"],
+                self.mp.param["mid_side_b2"],
+                self.mp.param["reverse"],
+            )
+            # pdb.set_trace()
+            if d == bands_n and self.data["high_end_process"] != "none":
+                input_high_end_h = (bp["n_fft"] // 2 - bp["crop_stop"]) + (
+                    self.mp.param["pre_filter_stop"] - self.mp.param["pre_filter_start"]
+                )
+                input_high_end = X_spec_s[d][
+                    :, bp["n_fft"] // 2 - input_high_end_h : bp["n_fft"] // 2, :
+                ]
+        X_spec_m = spec_utils.combine_spectrograms(X_spec_s, self.mp)
+        aggresive_set = float(self.data["agg"] / 100)
+        aggressiveness = {
+            "value": aggresive_set,
+            "split_bin": self.mp.param["band"][1]["crop_stop"],
+        }
+        with torch.no_grad():
+            pred, X_mag, X_phase = inference(
+                X_spec_m, self.device, self.model, aggressiveness, self.data
+            )
+        # Postprocess
+        if self.data["postprocess"]:
+            pred_inv = np.clip(X_mag - pred, 0, np.inf)
+            pred = spec_utils.mask_silence(pred, pred_inv)
+        y_spec_m = pred * X_phase
+        v_spec_m = X_spec_m - y_spec_m
+        if ins_root is not None:
+            if self.data["high_end_process"].startswith("mirroring"):
+                input_high_end_ = spec_utils.mirroring(
+                    self.data["high_end_process"], y_spec_m, input_high_end, self.mp
+                )
+                wav_instrument = spec_utils.cmb_spectrogram_to_wave(
+                    y_spec_m, self.mp, input_high_end_h, input_high_end_
+                )
+            else:
+                wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp)
+            print("%s instruments done")
+            if format in ["wav", "flac"]:
+                sf.write(
+                    os.path.join(
+                        ins_root,
+                        "instrument.{}".format(format),
+                    ),
+                    (np.array(wav_instrument) * 32768).astype("int16"),
+                    self.mp.param["sr"],
+                )  #
+            else:
+                path = os.path.join(
+                    ins_root, "instrument.wav"
+                )
+                sf.write(
+                    path,
+                    (np.array(wav_instrument) * 32768).astype("int16"),
+                    self.mp.param["sr"],
+                )
+                if os.path.exists(path):
+                    os.system(
+                        "ffmpeg -i %s -vn %s -q:a 2 -y"
+                        % (path, path[:-4] + ".%s" % format)
+                    )
+        if vocal_root is not None:
+            if self.data["high_end_process"].startswith("mirroring"):
+                input_high_end_ = spec_utils.mirroring(
+                    self.data["high_end_process"], v_spec_m, input_high_end, self.mp
+                )
+                wav_vocals = spec_utils.cmb_spectrogram_to_wave(
+                    v_spec_m, self.mp, input_high_end_h, input_high_end_
+                )
+            else:
+                wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp)
+            print("%s vocals done")
+            if format in ["wav", "flac"]:
+                sf.write(
+                    os.path.join(
+                        vocal_root,
+                        "vocal.{}".format(format),
+                    ),
+                    (np.array(wav_vocals) * 32768).astype("int16"),
+                    self.mp.param["sr"],
+                )
+            else:
+                path = os.path.join(
+                    vocal_root, "vocal.wav"
+                )
+                sf.write(
+                    path,
+                    (np.array(wav_vocals) * 32768).astype("int16"),
+                    self.mp.param["sr"],
+                )
+                if os.path.exists(path):
+                    os.system(
+                        "ffmpeg -i %s -vn %s -q:a 2 -y"
+                        % (path, path[:-4] + ".%s" % format)
+                    )
+class _audio_pre_new:
+    def __init__(self, agg, model_path, device, is_half):
+        print('_audio_pre_new')
+        self.model_path = model_path
+        self.device = device
+        self.data = {
+            # Processing Options
+            "postprocess": False,
+            "tta": False,
+            # Constants
+            "window_size": 512,
+            "agg": agg,
+            "high_end_process": "mirroring",
+        }
+        mp = ModelParameters("uvr5_pack/lib_v5/modelparams/4band_v3.json")
+        nout = 64 if "DeReverb" in model_path else 48
+        model = CascadedNet(mp.param["bins"] * 2, nout)
+        cpk = torch.load(model_path, map_location="cpu")
+        model.load_state_dict(cpk)
+        model.eval()
+        if is_half:
+            model = model.half().to(device)
+        else:
+            model = model.to(device)
+        self.mp = mp
+        self.model = model
+    def _path_audio_(
+        self, music_file, vocal_root=None, ins_root=None, format="flac"
+    ):  # 3个VR模型vocal和ins是反的
+        if ins_root is None and vocal_root is None:
+            return "No save root."
+        name = os.path.basename(music_file)
+        if ins_root is not None:
+            os.makedirs(ins_root, exist_ok=True)
+        if vocal_root is not None:
+            os.makedirs(vocal_root, exist_ok=True)
+        X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {}
+        bands_n = len(self.mp.param["band"])
+        # print(bands_n)
+        for d in range(bands_n, 0, -1):
+            bp = self.mp.param["band"][d]
+            if d == bands_n:  # high-end band
+                (
+                    X_wave[d],
+                    _,
+                ) = librosa.core.load(  # 理论上librosa读取可能对某些音频有bug，应该上ffmpeg读取，但是太麻烦了弃坑
+                    music_file,
+                    bp["sr"],
+                    False,
+                    dtype=np.float32,
+                    res_type=bp["res_type"],
+                )
+                if X_wave[d].ndim == 1:
+                    X_wave[d] = np.asfortranarray([X_wave[d], X_wave[d]])
+            else:  # lower bands
+                X_wave[d] = librosa.core.resample(
+                    X_wave[d + 1],
+                    self.mp.param["band"][d + 1]["sr"],
+                    bp["sr"],
+                    res_type=bp["res_type"],
+                )
+            # Stft of wave source
+            X_spec_s[d] = spec_utils.wave_to_spectrogram_mt(
+                X_wave[d],
+                bp["hl"],
+                bp["n_fft"],
+                self.mp.param["mid_side"],
+                self.mp.param["mid_side_b2"],
+                self.mp.param["reverse"],
+            )
+            # pdb.set_trace()
+            if d == bands_n and self.data["high_end_process"] != "none":
+                input_high_end_h = (bp["n_fft"] // 2 - bp["crop_stop"]) + (
+                    self.mp.param["pre_filter_stop"] - self.mp.param["pre_filter_start"]
+                )
+                input_high_end = X_spec_s[d][
+                    :, bp["n_fft"] // 2 - input_high_end_h : bp["n_fft"] // 2, :
+                ]
+        X_spec_m = spec_utils.combine_spectrograms(X_spec_s, self.mp)
+        aggresive_set = float(self.data["agg"] / 100)
+        aggressiveness = {
+            "value": aggresive_set,
+            "split_bin": self.mp.param["band"][1]["crop_stop"],
+        }
+        with torch.no_grad():
+            pred, X_mag, X_phase = inference(
+                X_spec_m, self.device, self.model, aggressiveness, self.data
+            )
+        # Postprocess
+        if self.data["postprocess"]:
+            pred_inv = np.clip(X_mag - pred, 0, np.inf)
+            pred = spec_utils.mask_silence(pred, pred_inv)
+        y_spec_m = pred * X_phase
+        v_spec_m = X_spec_m - y_spec_m
+        if ins_root is not None:
+            if self.data["high_end_process"].startswith("mirroring"):
+                input_high_end_ = spec_utils.mirroring(
+                    self.data["high_end_process"], y_spec_m, input_high_end, self.mp
+                )
+                wav_instrument = spec_utils.cmb_spectrogram_to_wave(
+                    y_spec_m, self.mp, input_high_end_h, input_high_end_
+                )
+            else:
+                wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp)
+            print("%s instruments done" % name)
+            if format in ["wav", "flac"]:
+                sf.write(
+                    os.path.join(
+                        ins_root,
+                        "instrument_{}_{}.{}".format(name, self.data["agg"], format),
+                    ),
+                    (np.array(wav_instrument) * 32768).astype("int16"),
+                    self.mp.param["sr"],
+                )  #
+            else:
+                path = os.path.join(
+                    ins_root, "instrument_{}_{}.wav".format(name, self.data["agg"])
+                )
+                sf.write(
+                    path,
+                    (np.array(wav_instrument) * 32768).astype("int16"),
+                    self.mp.param["sr"],
+                )
+                if os.path.exists(path):
+                    os.system(
+                        "ffmpeg -i %s -vn %s -q:a 2 -y"
+                        % (path, path[:-4] + ".%s" % format)
+                    )
+        if vocal_root is not None:
+            if self.data["high_end_process"].startswith("mirroring"):
+                input_high_end_ = spec_utils.mirroring(
+                    self.data["high_end_process"], v_spec_m, input_high_end, self.mp
+                )
+                wav_vocals = spec_utils.cmb_spectrogram_to_wave(
+                    v_spec_m, self.mp, input_high_end_h, input_high_end_
+                )
+            else:
+                wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp)
+            print("%s vocals done" % name)
+            if format in ["wav", "flac"]:
+                sf.write(
+                    os.path.join(
+                        vocal_root,
+                        "vocal_{}_{}.{}".format(name, self.data["agg"], format),
+                    ),
+                    (np.array(wav_vocals) * 32768).astype("int16"),
+                    self.mp.param["sr"],
+                )
+            else:
+                path = os.path.join(
+                    vocal_root, "vocal_{}_{}.wav".format(name, self.data["agg"])
+                )
+                sf.write(
+                    path,
+                    (np.array(wav_vocals) * 32768).astype("int16"),
+                    self.mp.param["sr"],
+                )
+                if os.path.exists(path):
+                    os.system(
+                        "ffmpeg -i %s -vn %s -q:a 2 -y"
+                        % (path, path[:-4] + ".%s" % format)
+                    )
+if __name__ == "__main__":
+    device = "cuda"
+    is_half = True
+    # model_path = "uvr5_weights/2_HP-UVR.pth"
+    # model_path = "uvr5_weights/VR-DeEchoDeReverb.pth"
+    # model_path = "uvr5_weights/VR-DeEchoNormal.pth"
+    model_path = "models/mymodelimran.pth"
+    # pre_fun = _audio_pre_(model_path=model_path, device=device, is_half=True,agg=10)
+    pre_fun = _audio_pre_new(model_path=model_path, device=device, is_half=True, agg=10)
+    audio_path = "audios/abc.mp3"
+    save_path = "results"
+    pre_fun._path_audio_(audio_path, save_path, save_path)

local.settings.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "IsEncrypted": false,
+  "Values": {
+    "AzureWebJobsStorage": "",
+    "FUNCTIONS_WORKER_RUNTIME": "python",
+    "AzureWebJobs.AltVoiceClone.Disabled": "true"
+  }
+}

main.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import functions_framework
+import os
+import logging
+@functions_framework.http
+def hello_http(request):
+    request_args = request.args
+    # Extract parameters from request
+    audio_file = request_args.get('audio_file')
+    model_name = request_args.get('model_name')
+    transform = request_args.get('transform')
+    song = request_args.get('song')
+    # Check if any parameter is None
+    if any(param is None for param in [audio_file, model_name, transform, song]):
+        return "Please provide all the required arguments: audio_file, model_name, transform, song."
+    else:
+        is_song = song.lower() == "true"
+        transform = int(transform)
+        import RVC_class
+        is_song = song.lower() == "true"
+        # Create an instance of VoiceConverter
+        converter = RVC_class.VoiceConverter()
+        # Call single_run method and get the result
+        result = converter.single_run(audio_file, model_name, transform, is_song)
+        return result

my_utils.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import ffmpeg
+import numpy as np
+import requests
+import logging
+import wave
+import librosa
+def load_audio(file_path, sr):
+    try:
+        with wave.open(file_path, 'rb') as audio_file:
+            channels = audio_file.getnchannels()
+            sample_width = audio_file.getsampwidth()
+            frame_rate = audio_file.getframerate()
+            frames = audio_file.readframes(audio_file.getnframes())
+        audio_data = np.frombuffer(frames, dtype=np.int16)
+        audio_data = audio_data.astype(np.float32) / np.iinfo(np.int16).max  # Convert to float and normalize to the range [-1, 1]
+        # Resample the audio if the sample rate is different
+        if frame_rate != sr:
+            audio_data = librosa.resample(audio_data, orig_sr=frame_rate, target_sr=sr)
+        # Perform any required audio processing or conversion
+        # ...
+    except Exception as e:
+        raise RuntimeError(f"Failed to load audio: {e}")
+    return audio_data

onnx_inference_demo.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import soundfile
+from infer_pack.onnx_inference import OnnxRVC
+hop_size = 512
+sampling_rate = 40000  # 采样率
+f0_up_key = 0  # 升降调
+sid = 0  # 角色ID
+f0_method = "dio"  # F0提取算法
+model_path = "ShirohaRVC.onnx"  # 模型的完整路径
+vec_name = "vec-256-layer-9"  # 内部自动补齐为 f"pretrained/{vec_name}.onnx" 需要onnx的vec模型
+wav_path = "123.wav"  # 输入路径或ByteIO实例
+out_path = "out.wav"  # 输出路径或ByteIO实例
+model = OnnxRVC(
+    model_path, vec_path=vec_name, sr=sampling_rate, hop_size=hop_size, device="cuda"
+)
+audio = model.inference(wav_path, sid, f0_method=f0_method, f0_up_key=f0_up_key)
+soundfile.write(out_path, audio, sampling_rate)

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,61 @@

+[tool.poetry]
+name = "rvc-beta"
+version = "0.1.0"
+description = ""
+authors = ["lj1995"]
+license = "MIT"
+[tool.poetry.dependencies]
+python = "^3.8"
+torch = "^2.0.0"
+torchaudio = "^2.0.1"
+Cython = "^0.29.34"
+gradio = "^3.24.1"
+future = "^0.18.3"
+pydub = "^0.25.1"
+soundfile = "^0.12.1"
+ffmpeg-python = "^0.2.0"
+tensorboardX = "^2.6"
+functorch = "^2.0.0"
+fairseq = "^0.12.2"
+faiss-cpu = "^1.7.2"
+Jinja2 = "^3.1.2"
+json5 = "^0.9.11"
+librosa = "0.9.2"
+llvmlite = "0.39.0"
+Markdown = "^3.4.3"
+matplotlib = "^3.7.1"
+matplotlib-inline = "^0.1.6"
+numba = "0.56.4"
+numpy = "1.23.5"
+scipy = "1.9.3"
+praat-parselmouth = "^0.4.3"
+Pillow = "9.3.0"
+pyworld = "^0.3.2"
+resampy = "^0.4.2"
+scikit-learn = "^1.2.2"
+starlette = "^0.27.0"
+tensorboard = "^2.12.1"
+tensorboard-data-server = "^0.7.0"
+tensorboard-plugin-wit = "^1.8.1"
+torchgen = "^0.0.1"
+tqdm = "^4.65.0"
+tornado = "^6.3"
+Werkzeug = "^2.2.3"
+uc-micro-py = "^1.0.1"
+sympy = "^1.11.1"
+tabulate = "^0.9.0"
+PyYAML = "^6.0"
+pyasn1 = "^0.4.8"
+pyasn1-modules = "^0.2.8"
+fsspec = "^2023.3.0"
+absl-py = "^1.4.0"
+audioread = "^3.0.0"
+uvicorn = "^0.21.1"
+colorama = "^0.4.6"
+[tool.poetry.dev-dependencies]
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"

requirements-win-for-realtime_vc_gui.txt ADDED Viewed

	@@ -0,0 +1,28 @@

+#1.Install torch from pytorch.org:
+#torch 2.0 with cuda 11.8
+#pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
+#torch 1.11.0 with cuda 11.3
+#pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/cu113
+einops
+fairseq
+flask
+flask_cors
+gin
+gin_config
+librosa
+local_attention
+matplotlib
+praat-parselmouth
+pyworld
+PyYAML
+resampy
+scikit_learn
+scipy
+SoundFile
+tensorboard
+tqdm
+wave
+PySimpleGUI
+sounddevice
+gradio
+noisereduce

requirements.txt ADDED Viewed

	@@ -0,0 +1,55 @@

+# DO NOT include azure-functions-worker in this file
+# The Python Worker is managed by Azure Functions platform
+# Manually managing azure-functions-worker may cause unexpected issues
+functions-framework
+joblib>=1.1.0
+numba==0.56.4
+numpy==1.23.5
+scipy==1.9.3
+librosa==0.9.1
+llvmlite==0.39.0
+fairseq==0.12.2
+faiss-cpu==1.7.3
+gradio==3.14.0
+Cython
+pydub>=0.25.1
+soundfile>=0.12.1
+ffmpeg-python>=0.2.0
+tensorboardX
+Jinja2>=3.1.2
+json5
+Markdown
+matplotlib>=3.7.0
+matplotlib-inline>=0.1.3
+praat-parselmouth>=0.4.2
+Pillow>=9.1.1
+resampy>=0.4.2
+scikit-learn
+starlette>=0.25.0
+tensorboard
+tensorboard-data-server
+tensorboard-plugin-wit
+torchgen>=0.0.1
+torch==2.0.0
+tqdm>=4.63.1
+tornado>=6.1
+Werkzeug>=2.2.3
+uc-micro-py>=1.0.1
+sympy>=1.11.1
+tabulate>=0.8.10
+PyYAML>=6.0
+pyasn1>=0.4.8
+pyasn1-modules>=0.2.8
+fsspec>=2022.11.0
+absl-py>=1.2.0
+audioread
+uvicorn>=0.21.1
+colorama>=0.4.5
+pyworld>=0.3.2
+httpx==0.23.0
+onnxruntime-gpu
+torchcrepe==0.0.20
+wave
+pydub
+requests

slicer2.py ADDED Viewed

	@@ -0,0 +1,260 @@

+import numpy as np
+# This function is obtained from librosa.
+def get_rms(
+    y,
+    frame_length=2048,
+    hop_length=512,
+    pad_mode="constant",
+):
+    padding = (int(frame_length // 2), int(frame_length // 2))
+    y = np.pad(y, padding, mode=pad_mode)
+    axis = -1
+    # put our new within-frame axis at the end for now
+    out_strides = y.strides + tuple([y.strides[axis]])
+    # Reduce the shape on the framing axis
+    x_shape_trimmed = list(y.shape)
+    x_shape_trimmed[axis] -= frame_length - 1
+    out_shape = tuple(x_shape_trimmed) + tuple([frame_length])
+    xw = np.lib.stride_tricks.as_strided(y, shape=out_shape, strides=out_strides)
+    if axis < 0:
+        target_axis = axis - 1
+    else:
+        target_axis = axis + 1
+    xw = np.moveaxis(xw, -1, target_axis)
+    # Downsample along the target axis
+    slices = [slice(None)] * xw.ndim
+    slices[axis] = slice(0, None, hop_length)
+    x = xw[tuple(slices)]
+    # Calculate power
+    power = np.mean(np.abs(x) ** 2, axis=-2, keepdims=True)
+    return np.sqrt(power)
+class Slicer:
+    def __init__(
+        self,
+        sr: int,
+        threshold: float = -40.0,
+        min_length: int = 5000,
+        min_interval: int = 300,
+        hop_size: int = 20,
+        max_sil_kept: int = 5000,
+    ):
+        if not min_length >= min_interval >= hop_size:
+            raise ValueError(
+                "The following condition must be satisfied: min_length >= min_interval >= hop_size"
+            )
+        if not max_sil_kept >= hop_size:
+            raise ValueError(
+                "The following condition must be satisfied: max_sil_kept >= hop_size"
+            )
+        min_interval = sr * min_interval / 1000
+        self.threshold = 10 ** (threshold / 20.0)
+        self.hop_size = round(sr * hop_size / 1000)
+        self.win_size = min(round(min_interval), 4 * self.hop_size)
+        self.min_length = round(sr * min_length / 1000 / self.hop_size)
+        self.min_interval = round(min_interval / self.hop_size)
+        self.max_sil_kept = round(sr * max_sil_kept / 1000 / self.hop_size)
+    def _apply_slice(self, waveform, begin, end):
+        if len(waveform.shape) > 1:
+            return waveform[
+                :, begin * self.hop_size : min(waveform.shape[1], end * self.hop_size)
+            ]
+        else:
+            return waveform[
+                begin * self.hop_size : min(waveform.shape[0], end * self.hop_size)
+            ]
+    # @timeit
+    def slice(self, waveform):
+        if len(waveform.shape) > 1:
+            samples = waveform.mean(axis=0)
+        else:
+            samples = waveform
+        if samples.shape[0] <= self.min_length:
+            return [waveform]
+        rms_list = get_rms(
+            y=samples, frame_length=self.win_size, hop_length=self.hop_size
+        ).squeeze(0)
+        sil_tags = []
+        silence_start = None
+        clip_start = 0
+        for i, rms in enumerate(rms_list):
+            # Keep looping while frame is silent.
+            if rms < self.threshold:
+                # Record start of silent frames.
+                if silence_start is None:
+                    silence_start = i
+                continue
+            # Keep looping while frame is not silent and silence start has not been recorded.
+            if silence_start is None:
+                continue
+            # Clear recorded silence start if interval is not enough or clip is too short
+            is_leading_silence = silence_start == 0 and i > self.max_sil_kept
+            need_slice_middle = (
+                i - silence_start >= self.min_interval
+                and i - clip_start >= self.min_length
+            )
+            if not is_leading_silence and not need_slice_middle:
+                silence_start = None
+                continue
+            # Need slicing. Record the range of silent frames to be removed.
+            if i - silence_start <= self.max_sil_kept:
+                pos = rms_list[silence_start : i + 1].argmin() + silence_start
+                if silence_start == 0:
+                    sil_tags.append((0, pos))
+                else:
+                    sil_tags.append((pos, pos))
+                clip_start = pos
+            elif i - silence_start <= self.max_sil_kept * 2:
+                pos = rms_list[
+                    i - self.max_sil_kept : silence_start + self.max_sil_kept + 1
+                ].argmin()
+                pos += i - self.max_sil_kept
+                pos_l = (
+                    rms_list[
+                        silence_start : silence_start + self.max_sil_kept + 1
+                    ].argmin()
+                    + silence_start
+                )
+                pos_r = (
+                    rms_list[i - self.max_sil_kept : i + 1].argmin()
+                    + i
+                    - self.max_sil_kept
+                )
+                if silence_start == 0:
+                    sil_tags.append((0, pos_r))
+                    clip_start = pos_r
+                else:
+                    sil_tags.append((min(pos_l, pos), max(pos_r, pos)))
+                    clip_start = max(pos_r, pos)
+            else:
+                pos_l = (
+                    rms_list[
+                        silence_start : silence_start + self.max_sil_kept + 1
+                    ].argmin()
+                    + silence_start
+                )
+                pos_r = (
+                    rms_list[i - self.max_sil_kept : i + 1].argmin()
+                    + i
+                    - self.max_sil_kept
+                )
+                if silence_start == 0:
+                    sil_tags.append((0, pos_r))
+                else:
+                    sil_tags.append((pos_l, pos_r))
+                clip_start = pos_r
+            silence_start = None
+        # Deal with trailing silence.
+        total_frames = rms_list.shape[0]
+        if (
+            silence_start is not None
+            and total_frames - silence_start >= self.min_interval
+        ):
+            silence_end = min(total_frames, silence_start + self.max_sil_kept)
+            pos = rms_list[silence_start : silence_end + 1].argmin() + silence_start
+            sil_tags.append((pos, total_frames + 1))
+        # Apply and return slices.
+        if len(sil_tags) == 0:
+            return [waveform]
+        else:
+            chunks = []
+            if sil_tags[0][0] > 0:
+                chunks.append(self._apply_slice(waveform, 0, sil_tags[0][0]))
+            for i in range(len(sil_tags) - 1):
+                chunks.append(
+                    self._apply_slice(waveform, sil_tags[i][1], sil_tags[i + 1][0])
+                )
+            if sil_tags[-1][1] < total_frames:
+                chunks.append(
+                    self._apply_slice(waveform, sil_tags[-1][1], total_frames)
+                )
+            return chunks
+def main():
+    import os.path
+    from argparse import ArgumentParser
+    import librosa
+    import soundfile
+    parser = ArgumentParser()
+    parser.add_argument("audio", type=str, help="The audio to be sliced")
+    parser.add_argument(
+        "--out", type=str, help="Output directory of the sliced audio clips"
+    )
+    parser.add_argument(
+        "--db_thresh",
+        type=float,
+        required=False,
+        default=-40,
+        help="The dB threshold for silence detection",
+    )
+    parser.add_argument(
+        "--min_length",
+        type=int,
+        required=False,
+        default=5000,
+        help="The minimum milliseconds required for each sliced audio clip",
+    )
+    parser.add_argument(
+        "--min_interval",
+        type=int,
+        required=False,
+        default=300,
+        help="The minimum milliseconds for a silence part to be sliced",
+    )
+    parser.add_argument(
+        "--hop_size",
+        type=int,
+        required=False,
+        default=10,
+        help="Frame length in milliseconds",
+    )
+    parser.add_argument(
+        "--max_sil_kept",
+        type=int,
+        required=False,
+        default=500,
+        help="The maximum silence length kept around the sliced clip, presented in milliseconds",
+    )
+    args = parser.parse_args()
+    out = args.out
+    if out is None:
+        out = os.path.dirname(os.path.abspath(args.audio))
+    audio, sr = librosa.load(args.audio, sr=None, mono=False)
+    slicer = Slicer(
+        sr=sr,
+        threshold=args.db_thresh,
+        min_length=args.min_length,
+        min_interval=args.min_interval,
+        hop_size=args.hop_size,
+        max_sil_kept=args.max_sil_kept,
+    )
+    chunks = slicer.slice(audio)
+    if not os.path.exists(out):
+        os.makedirs(out)
+    for i, chunk in enumerate(chunks):
+        if len(chunk.shape) > 1:
+            chunk = chunk.T
+        soundfile.write(
+            os.path.join(
+                out,
+                f"%s_%d.wav"
+                % (os.path.basename(args.audio).rsplit(".", maxsplit=1)[0], i),
+            ),
+            chunk,
+            sr,
+        )
+if __name__ == "__main__":
+    main()

temp.py ADDED Viewed

	@@ -0,0 +1,1392 @@

+import os
+import shutil
+import sys
+import tempfile
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+import traceback, pdb
+import warnings
+import numpy as np
+import torch
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
+os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1"
+import logging
+import threading
+from random import shuffle
+from subprocess import Popen
+from time import sleep
+import faiss
+import ffmpeg
+import gradio as gr
+import soundfile as sf
+from config import Config
+from fairseq import checkpoint_utils
+from i18n import I18nAuto
+from infer_pack.models import (
+    SynthesizerTrnMs256NSFsid,
+    SynthesizerTrnMs256NSFsid_nono,
+    SynthesizerTrnMs768NSFsid,
+    SynthesizerTrnMs768NSFsid_nono,
+)
+from infer_pack.models_onnx import SynthesizerTrnMsNSFsidM
+from infer_uvr5 import _audio_pre_, _audio_pre_new
+from MDXNet import MDXNetDereverb
+from my_utils import load_audio
+from train.process_ckpt import change_info, extract_small_model, merge, show_info
+from vc_infer_pipeline import VC
+from sklearn.cluster import MiniBatchKMeans
+logging.getLogger("numba").setLevel(logging.WARNING)
+tmp = os.path.join(now_dir, "TEMP")
+shutil.rmtree(tmp, ignore_errors=True)
+shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True)
+shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True)
+os.makedirs(tmp, exist_ok=True)
+os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
+os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True)
+os.environ["TEMP"] = tmp
+warnings.filterwarnings("ignore")
+torch.manual_seed(114514)
+from scipy.io import wavfile
+config = Config()
+i18n = I18nAuto()
+i18n.print()
+# 判断是否有能用来训练和加速推理的N卡
+ngpu = torch.cuda.device_count()
+gpu_infos = []
+mem = []
+if_gpu_ok = False
+if torch.cuda.is_available() or ngpu != 0:
+    for i in range(ngpu):
+        gpu_name = torch.cuda.get_device_name(i)
+        if any(
+            value in gpu_name.upper()
+            for value in [
+                "10",
+                "16",
+                "20",
+                "30",
+                "40",
+                "A2",
+                "A3",
+                "A4",
+                "P4",
+                "A50",
+                "500",
+                "A60",
+                "70",
+                "80",
+                "90",
+                "M4",
+                "T4",
+                "TITAN",
+            ]
+        ):
+            # A10#A100#V100#A40#P40#M40#K80#A4500
+            if_gpu_ok = True  # 至少有一张能用的N卡
+            gpu_infos.append("%s\t%s" % (i, gpu_name))
+            mem.append(
+                int(
+                    torch.cuda.get_device_properties(i).total_memory
+                    / 1024
+                    / 1024
+                    / 1024
+                    + 0.4
+                )
+            )
+if if_gpu_ok and len(gpu_infos) > 0:
+    gpu_info = "\n".join(gpu_infos)
+    default_batch_size = 1
+else:
+    gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练")
+    default_batch_size = 1
+gpus = "-".join([i[0] for i in gpu_infos])
+class ToolButton(gr.Button, gr.components.FormComponent):
+    """Small button with single emoji as text, fits inside gradio forms"""
+    def __init__(self, **kwargs):
+        super().__init__(variant="tool", **kwargs)
+    def get_block_name(self):
+        return "button"
+hubert_model = None
+def load_hubert():
+    global hubert_model
+    models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
+        ["hubert_base.pt"],
+        suffix="",
+    )
+    hubert_model = models[0]
+    hubert_model = hubert_model.to(config.device)
+    if config.is_half:
+        hubert_model = hubert_model.half()
+    else:
+        hubert_model = hubert_model.float()
+    hubert_model.eval()
+weight_root = "weights"
+weight_uvr5_root = "uvr5_weights"
+index_root = "logs"
+names = []
+for name in os.listdir(weight_root):
+    if name.endswith(".pth"):
+        names.append(name)
+index_paths = []
+for root, dirs, files in os.walk(index_root, topdown=False):
+    for name in files:
+        if name.endswith(".index") and "trained" not in name:
+            index_paths.append("%s/%s" % (root, name))
+uvr5_names = []
+for name in os.listdir(weight_uvr5_root):
+    if name.endswith(".pth") or "onnx" in name:
+        uvr5_names.append(name.replace(".pth", ""))
+def vc_single(
+    sid,
+    input_audio_path,
+    f0_up_key,
+    f0_file,
+    f0_method,
+    file_index,
+    file_index2,
+    # file_big_npy,
+    index_rate,
+    filter_radius,
+    resample_sr,
+    rms_mix_rate,
+    protect,
+):  # spk_item, input_audio0, vc_transform0,f0_file,f0method0
+    global tgt_sr, net_g, vc, hubert_model, version
+    print(f0_up_key)
+    if input_audio_path is None:
+        return "You need to upload an audio", None
+    print("input_audio_path: ", input_audio_path)
+    print("f0_up_key: ", f0_up_key)
+    f0_up_key = int(f0_up_key)
+    try:
+        audio = load_audio(input_audio_path, 16000)
+        audio_max = np.abs(audio).max() / 0.95
+        if audio_max > 1:
+            audio /= audio_max
+        times = [0, 0, 0]
+        if not hubert_model:
+            load_hubert()
+        if_f0 = cpt.get("f0", 1)
+        file_index = (
+            (
+                file_index.strip(" ")
+                .strip('"')
+                .strip("\n")
+                .strip('"')
+                .strip(" ")
+                .replace("trained", "added")
+            )
+            if file_index != ""
+            else file_index2
+        )  # 防止小白写错，自动帮他替换掉
+        # file_big_npy = (
+        #     file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+        # )
+        audio_opt = vc.pipeline(
+            hubert_model,
+            net_g,
+            sid,
+            audio,
+            input_audio_path,
+            times,
+            f0_up_key,
+            f0_method,
+            file_index,
+            # file_big_npy,
+            index_rate,
+            if_f0,
+            filter_radius,
+            tgt_sr,
+            resample_sr,
+            rms_mix_rate,
+            version,
+            protect,
+            f0_file=f0_file,
+        )
+        print(f0_up_key)
+        if tgt_sr != resample_sr >= 16000:
+            tgt_sr = resample_sr
+        index_info = (
+            "Using index:%s." % file_index
+            if os.path.exists(file_index)
+            else "Index not used."
+        )
+        return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
+            index_info,
+            times[0],
+            times[1],
+            times[2],
+        ), (tgt_sr, audio_opt)
+    except:
+        info = traceback.format_exc()
+        print(info)
+        return info, (None, None)
+def vc_multi(
+    sid,
+    dir_path,
+    opt_root,
+    paths,
+    f0_up_key,
+    f0_method,
+    file_index,
+    file_index2,
+    # file_big_npy,
+    index_rate,
+    filter_radius,
+    resample_sr,
+    rms_mix_rate,
+    protect,
+    format1,
+):
+    try:
+        print(f0_up_key)
+        dir_path = (
+            dir_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+        )  # 防止小白拷路径头尾带了空格和"和回车
+        opt_root = opt_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+        os.makedirs(opt_root, exist_ok=True)
+        try:
+            if dir_path != "":
+                paths = [os.path.join(dir_path, name) for name in os.listdir(dir_path)]
+            else:
+                paths = [path.name for path in paths]
+        except:
+            traceback.print_exc()
+            paths = [path.name for path in paths]
+        infos = []
+        for path in paths:
+            info, opt = vc_single(
+                sid,
+                path,
+                f0_up_key,
+                None,
+                f0_method,
+                file_index,
+                file_index2,
+                # file_big_npy,
+                index_rate,
+                filter_radius,
+                resample_sr,
+                rms_mix_rate,
+                protect,
+            )
+            if "Success" in info:
+                try:
+                    tgt_sr, audio_opt = opt
+                    if format1 in ["wav", "flac"]:
+                        sf.write(
+                            "%s/%s.%s" % (opt_root, os.path.basename(path), format1),
+                            audio_opt,
+                            tgt_sr,
+                        )
+                    else:
+                        path = "%s/%s.wav" % (opt_root, os.path.basename(path))
+                        sf.write(
+                            path,
+                            audio_opt,
+                            tgt_sr,
+                        )
+                        if os.path.exists(path):
+                            os.system(
+                                "ffmpeg -i %s -vn %s -q:a 2 -y"
+                                % (path, path[:-4] + ".%s" % format1)
+                            )
+                except:
+                    info += traceback.format_exc()
+            infos.append("%s->%s" % (os.path.basename(path), info))
+            yield "\n".join(infos)
+        yield "\n".join(infos)
+    except:
+        yield traceback.format_exc()
+def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0):
+    infos = []
+    try:
+        inp_root = inp_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+        save_root_vocal = (
+            save_root_vocal.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+        )
+        save_root_ins = (
+            save_root_ins.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+        )
+        if model_name == "onnx_dereverb_By_FoxJoy":
+            pre_fun = MDXNetDereverb(15)
+        else:
+            func = _audio_pre_ if "DeEcho" not in model_name else _audio_pre_new
+            pre_fun = func(
+                agg=int(agg),
+                model_path=os.path.join(weight_uvr5_root, model_name + ".pth"),
+                device=config.device,
+                is_half=config.is_half,
+            )
+        if inp_root != "":
+            paths = [os.path.join(inp_root, name) for name in os.listdir(inp_root)]
+        else:
+            paths = [path.name for path in paths]
+        for path in paths:
+            inp_path = os.path.join(inp_root, path)
+            need_reformat = 1
+            done = 0
+            try:
+                info = ffmpeg.probe(inp_path, cmd="ffprobe")
+                if (
+                    info["streams"][0]["channels"] == 2
+                    and info["streams"][0]["sample_rate"] == "44100"
+                ):
+                    need_reformat = 0
+                    pre_fun._path_audio_(
+                        inp_path, save_root_ins, save_root_vocal, format0
+                    )
+                    done = 1
+            except:
+                need_reformat = 1
+                traceback.print_exc()
+            if need_reformat == 1:
+                tmp_path = "%s/%s.reformatted.wav" % (tmp, os.path.basename(inp_path))
+                os.system(
+                    "ffmpeg -i %s -vn -acodec pcm_s16le -ac 2 -ar 44100 %s -y"
+                    % (inp_path, tmp_path)
+                )
+                inp_path = tmp_path
+            try:
+                if done == 0:
+                    pre_fun._path_audio_(
+                        inp_path, save_root_ins, save_root_vocal, format0
+                    )
+                infos.append("%s->Success" % (os.path.basename(inp_path)))
+                yield "\n".join(infos)
+            except:
+                infos.append(
+                    "%s->%s" % (os.path.basename(inp_path), traceback.format_exc())
+                )
+                yield "\n".join(infos)
+    except:
+        infos.append(traceback.format_exc())
+        yield "\n".join(infos)
+    finally:
+        try:
+            if model_name == "onnx_dereverb_By_FoxJoy":
+                del pre_fun.pred.model
+                del pre_fun.pred.model_
+            else:
+                del pre_fun.model
+                del pre_fun
+        except:
+            traceback.print_exc()
+        print("clean_empty_cache")
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    yield "\n".join(infos)
+# 一个选项卡全局只能有一个音色
+def get_vc(sid, to_return_protect0, to_return_protect1):
+    global n_spk, tgt_sr, net_g, vc, cpt, version
+    if sid == "" or sid == []:
+        global hubert_model
+        if hubert_model is not None:  # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
+            print("clean_empty_cache")
+            del net_g, n_spk, vc, hubert_model, tgt_sr  # ,cpt
+            hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            ###楼下不这么折腾清理不干净
+            if_f0 = cpt.get("f0", 1)
+            version = cpt.get("version", "v1")
+            if version == "v1":
+                if if_f0 == 1:
+                    net_g = SynthesizerTrnMs256NSFsid(
+                        *cpt["config"], is_half=config.is_half
+                    )
+                else:
+                    net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
+            elif version == "v2":
+                if if_f0 == 1:
+                    net_g = SynthesizerTrnMs768NSFsid(
+                        *cpt["config"], is_half=config.is_half
+                    )
+                else:
+                    net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
+            del net_g, cpt
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            cpt = None
+        return {"visible": False, "__type__": "update"}
+    person = "%s/%s" % (weight_root, sid)
+    print("loading %s" % person)
+    cpt = torch.load(person, map_location="cpu")
+    tgt_sr = cpt["config"][-1]
+    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
+    if_f0 = cpt.get("f0", 1)
+    if if_f0 == 0:
+        to_return_protect0 = to_return_protect1 = {
+            "visible": False,
+            "value": 0.5,
+            "__type__": "update",
+        }
+    else:
+        to_return_protect0 = {
+            "visible": True,
+            "value": to_return_protect0,
+            "__type__": "update",
+        }
+        to_return_protect1 = {
+            "visible": True,
+            "value": to_return_protect1,
+            "__type__": "update",
+        }
+    version = cpt.get("version", "v1")
+    if version == "v1":
+        if if_f0 == 1:
+            net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
+        else:
+            net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
+    elif version == "v2":
+        if if_f0 == 1:
+            net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)
+        else:
+            net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
+    del net_g.enc_q
+    print(net_g.load_state_dict(cpt["weight"], strict=False))
+    net_g.eval().to(config.device)
+    if config.is_half:
+        net_g = net_g.half()
+    else:
+        net_g = net_g.float()
+    vc = VC(tgt_sr, config)
+    n_spk = cpt["config"][-3]
+    return (
+        {"visible": True, "maximum": n_spk, "__type__": "update"},
+        to_return_protect0,
+        to_return_protect1,
+    )
+def change_choices():
+    names = []
+    for name in os.listdir(weight_root):
+        if name.endswith(".pth"):
+            names.append(name)
+    index_paths = []
+    for root, dirs, files in os.walk(index_root, topdown=False):
+        for name in files:
+            if name.endswith(".index") and "trained" not in name:
+                index_paths.append("%s/%s" % (root, name))
+    return {"choices": sorted(names), "__type__": "update"}, {
+        "choices": sorted(index_paths),
+        "__type__": "update",
+    }
+def clean():
+    return {"value": "", "__type__": "update"}
+sr_dict = {
+    "32k": 32000,
+    "40k": 40000,
+    "48k": 48000,
+}
+def if_done(done, p):
+    while 1:
+        if p.poll() is None:
+            sleep(0.5)
+        else:
+            break
+    done[0] = True
+def if_done_multi(done, ps):
+    while 1:
+        # poll==None代表进程未结束
+        # 只要有一个进程未结束都不停
+        flag = 1
+        for p in ps:
+            if p.poll() is None:
+                flag = 0
+                sleep(0.5)
+                break
+        if flag == 1:
+            break
+    done[0] = True
+def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
+    sr = sr_dict[sr]
+    os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
+    f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w")
+    f.close()
+    cmd = (
+        config.python_cmd
+        + " trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s "
+        % (trainset_dir, sr, n_p, now_dir, exp_dir)
+        + str(config.noparallel)
+    )
+    print(cmd)
+    p = Popen(cmd, shell=True)  # , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir
+    ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
+    done = [False]
+    threading.Thread(
+        target=if_done,
+        args=(
+            done,
+            p,
+        ),
+    ).start()
+    while 1:
+        with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
+            yield (f.read())
+        sleep(1)
+        if done[0]:
+            break
+    with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
+        log = f.read()
+    print(log)
+    yield log
+# but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2])
+def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19):
+    gpus = gpus.split("-")
+    os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
+    f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w")
+    f.close()
+    if if_f0:
+        cmd = config.python_cmd + " extract_f0_print.py %s/logs/%s %s %s" % (
+            now_dir,
+            exp_dir,
+            n_p,
+            f0method,
+        )
+        print(cmd)
+        p = Popen(cmd, shell=True, cwd=now_dir)  # , stdin=PIPE, stdout=PIPE,stderr=PIPE
+        ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
+        done = [False]
+        threading.Thread(
+            target=if_done,
+            args=(
+                done,
+                p,
+            ),
+        ).start()
+        while 1:
+            with open(
+                "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r"
+            ) as f:
+                yield (f.read())
+            sleep(1)
+            if done[0]:
+                break
+        with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
+            log = f.read()
+        print(log)
+        yield log
+    ####对不同part分别开多进程
+    """
+    n_part=int(sys.argv[1])
+    i_part=int(sys.argv[2])
+    i_gpu=sys.argv[3]
+    exp_dir=sys.argv[4]
+    os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
+    """
+    leng = len(gpus)
+    ps = []
+    for idx, n_g in enumerate(gpus):
+        cmd = (
+            config.python_cmd
+            + " extract_feature_print.py %s %s %s %s %s/logs/%s %s"
+            % (
+                config.device,
+                leng,
+                idx,
+                n_g,
+                now_dir,
+                exp_dir,
+                version19,
+            )
+        )
+        print(cmd)
+        p = Popen(
+            cmd, shell=True, cwd=now_dir
+        )  # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
+        ps.append(p)
+    ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
+    done = [False]
+    threading.Thread(
+        target=if_done_multi,
+        args=(
+            done,
+            ps,
+        ),
+    ).start()
+    while 1:
+        with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
+            yield (f.read())
+        sleep(1)
+        if done[0]:
+            break
+    with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
+        log = f.read()
+    print(log)
+    yield log
+def change_sr2(sr2, if_f0_3, version19):
+    path_str = "" if version19 == "v1" else "_v2"
+    f0_str = "f0" if if_f0_3 else ""
+    if_pretrained_generator_exist = os.access(
+        "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK
+    )
+    if_pretrained_discriminator_exist = os.access(
+        "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK
+    )
+    if not if_pretrained_generator_exist:
+        print(
+            "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2),
+            "not exist, will not use pretrained model",
+        )
+    if not if_pretrained_discriminator_exist:
+        print(
+            "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2),
+            "not exist, will not use pretrained model",
+        )
+    return (
+        "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)
+        if if_pretrained_generator_exist
+        else "",
+        "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
+        if if_pretrained_discriminator_exist
+        else "",
+    )
+def change_version19(sr2, if_f0_3, version19):
+    path_str = "" if version19 == "v1" else "_v2"
+    if sr2 == "32k" and version19 == "v1":
+        sr2 = "40k"
+    to_return_sr2 = (
+        {"choices": ["40k", "48k"], "__type__": "update", "value": sr2}
+        if version19 == "v1"
+        else {"choices": ["40k", "48k", "32k"], "__type__": "update", "value": sr2}
+    )
+    f0_str = "f0" if if_f0_3 else ""
+    if_pretrained_generator_exist = os.access(
+        "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK
+    )
+    if_pretrained_discriminator_exist = os.access(
+        "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK
+    )
+    if not if_pretrained_generator_exist:
+        print(
+            "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2),
+            "not exist, will not use pretrained model",
+        )
+    if not if_pretrained_discriminator_exist:
+        print(
+            "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2),
+            "not exist, will not use pretrained model",
+        )
+    return (
+        "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)
+        if if_pretrained_generator_exist
+        else "",
+        "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
+        if if_pretrained_discriminator_exist
+        else "",
+        to_return_sr2,
+    )
+def change_f0(if_f0_3, sr2, version19):  # f0method8,pretrained_G14,pretrained_D15
+    path_str = "" if version19 == "v1" else "_v2"
+    if_pretrained_generator_exist = os.access(
+        "pretrained%s/f0G%s.pth" % (path_str, sr2), os.F_OK
+    )
+    if_pretrained_discriminator_exist = os.access(
+        "pretrained%s/f0D%s.pth" % (path_str, sr2), os.F_OK
+    )
+    if not if_pretrained_generator_exist:
+        print(
+            "pretrained%s/f0G%s.pth" % (path_str, sr2),
+            "not exist, will not use pretrained model",
+        )
+    if not if_pretrained_discriminator_exist:
+        print(
+            "pretrained%s/f0D%s.pth" % (path_str, sr2),
+            "not exist, will not use pretrained model",
+        )
+    if if_f0_3:
+        return (
+            {"visible": True, "__type__": "update"},
+            "pretrained%s/f0G%s.pth" % (path_str, sr2)
+            if if_pretrained_generator_exist
+            else "",
+            "pretrained%s/f0D%s.pth" % (path_str, sr2)
+            if if_pretrained_discriminator_exist
+            else "",
+        )
+    return (
+        {"visible": False, "__type__": "update"},
+        ("pretrained%s/G%s.pth" % (path_str, sr2))
+        if if_pretrained_generator_exist
+        else "",
+        ("pretrained%s/D%s.pth" % (path_str, sr2))
+        if if_pretrained_discriminator_exist
+        else "",
+    )
+# but3.click(click_train,[exp_dir1,sr2,if_f0_3,save_epoch10,total_epoch11,batch_size12,if_save_latest13,pretrained_G14,pretrained_D15,gpus16])
+def click_train(
+    exp_dir1,
+    sr2,
+    if_f0_3,
+    spk_id5,
+    save_epoch10,
+    total_epoch11,
+    batch_size12,
+    if_save_latest13,
+    pretrained_G14,
+    pretrained_D15,
+    gpus16,
+    if_cache_gpu17,
+    if_save_every_weights18,
+    version19,
+):
+    # 生成filelist
+    exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
+    os.makedirs(exp_dir, exist_ok=True)
+    gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir)
+    feature_dir = (
+        "%s/3_feature256" % (exp_dir)
+        if version19 == "v1"
+        else "%s/3_feature768" % (exp_dir)
+    )
+    if if_f0_3:
+        f0_dir = "%s/2a_f0" % (exp_dir)
+        f0nsf_dir = "%s/2b-f0nsf" % (exp_dir)
+        names = (
+            set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
+            & set([name.split(".")[0] for name in os.listdir(feature_dir)])
+            & set([name.split(".")[0] for name in os.listdir(f0_dir)])
+            & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
+        )
+    else:
+        names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
+            [name.split(".")[0] for name in os.listdir(feature_dir)]
+        )
+    opt = []
+    for name in names:
+        if if_f0_3:
+            opt.append(
+                "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s"
+                % (
+                    gt_wavs_dir.replace("\\", "\\\\"),
+                    name,
+                    feature_dir.replace("\\", "\\\\"),
+                    name,
+                    f0_dir.replace("\\", "\\\\"),
+                    name,
+                    f0nsf_dir.replace("\\", "\\\\"),
+                    name,
+                    spk_id5,
+                )
+            )
+        else:
+            opt.append(
+                "%s/%s.wav|%s/%s.npy|%s"
+                % (
+                    gt_wavs_dir.replace("\\", "\\\\"),
+                    name,
+                    feature_dir.replace("\\", "\\\\"),
+                    name,
+                    spk_id5,
+                )
+            )
+    fea_dim = 256 if version19 == "v1" else 768
+    if if_f0_3:
+        for _ in range(2):
+            opt.append(
+                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
+                % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
+            )
+    else:
+        for _ in range(2):
+            opt.append(
+                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
+                % (now_dir, sr2, now_dir, fea_dim, spk_id5)
+            )
+    shuffle(opt)
+    with open("%s/filelist.txt" % exp_dir, "w") as f:
+        f.write("\n".join(opt))
+    print("write filelist done")
+    # 生成config#无需生成config
+    # cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0"
+    print("use gpus:", gpus16)
+    if pretrained_G14 == "":
+        print("no pretrained Generator")
+    if pretrained_D15 == "":
+        print("no pretrained Discriminator")
+    if gpus16:
+        cmd = (
+            config.python_cmd
+            + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
+            % (
+                exp_dir1,
+                sr2,
+                1 if if_f0_3 else 0,
+                batch_size12,
+                gpus16,
+                total_epoch11,
+                save_epoch10,
+                "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "",
+                "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "",
+                1 if if_save_latest13 == i18n("是") else 0,
+                1 if if_cache_gpu17 == i18n("是") else 0,
+                1 if if_save_every_weights18 == i18n("是") else 0,
+                version19,
+            )
+        )
+    else:
+        cmd = (
+            config.python_cmd
+            + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
+            % (
+                exp_dir1,
+                sr2,
+                1 if if_f0_3 else 0,
+                batch_size12,
+                total_epoch11,
+                save_epoch10,
+                "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "\b",
+                "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "\b",
+                1 if if_save_latest13 == i18n("是") else 0,
+                1 if if_cache_gpu17 == i18n("是") else 0,
+                1 if if_save_every_weights18 == i18n("是") else 0,
+                version19,
+            )
+        )
+    print(cmd)
+    p = Popen(cmd, shell=True, cwd=now_dir)
+    p.wait()
+    return "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log"
+# but4.click(train_index, [exp_dir1], info3)
+def train_index(exp_dir1, version19):
+    exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
+    os.makedirs(exp_dir, exist_ok=True)
+    feature_dir = (
+        "%s/3_feature256" % (exp_dir)
+        if version19 == "v1"
+        else "%s/3_feature768" % (exp_dir)
+    )
+    if not os.path.exists(feature_dir):
+        return "请先进行特征提取!"
+    listdir_res = list(os.listdir(feature_dir))
+    if len(listdir_res) == 0:
+        return "请先进行特征提取！"
+    infos = []
+    npys = []
+    for name in sorted(listdir_res):
+        phone = np.load("%s/%s" % (feature_dir, name))
+        npys.append(phone)
+    big_npy = np.concatenate(npys, 0)
+    big_npy_idx = np.arange(big_npy.shape[0])
+    np.random.shuffle(big_npy_idx)
+    big_npy = big_npy[big_npy_idx]
+    if big_npy.shape[0] > 2e5:
+        # if(1):
+        infos.append("Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0])
+        yield "\n".join(infos)
+        try:
+            big_npy = (
+                MiniBatchKMeans(
+                    n_clusters=10000,
+                    verbose=True,
+                    batch_size=256 * config.n_cpu,
+                    compute_labels=False,
+                    init="random",
+                )
+                .fit(big_npy)
+                .cluster_centers_
+            )
+        except:
+            info = traceback.format_exc()
+            print(info)
+            infos.append(info)
+            yield "\n".join(infos)
+    np.save("%s/total_fea.npy" % exp_dir, big_npy)
+    n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
+    infos.append("%s,%s" % (big_npy.shape, n_ivf))
+    yield "\n".join(infos)
+    index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf)
+    # index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf)
+    infos.append("training")
+    yield "\n".join(infos)
+    index_ivf = faiss.extract_index_ivf(index)  #
+    index_ivf.nprobe = 1
+    index.train(big_npy)
+    faiss.write_index(
+        index,
+        "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
+    )
+    # faiss.write_index(index, '%s/trained_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19))
+    infos.append("adding")
+    yield "\n".join(infos)
+    batch_size_add = 8192
+    for i in range(0, big_npy.shape[0], batch_size_add):
+        index.add(big_npy[i : i + batch_size_add])
+    faiss.write_index(
+        index,
+        "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
+    )
+    infos.append(
+        "成功构建索引，added_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (n_ivf, index_ivf.nprobe, exp_dir1, version19)
+    )
+    # faiss.write_index(index, '%s/added_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19))
+    # infos.append("成功构建索引，added_IVF%s_Flat_FastScan_%s.index"%(n_ivf,version19))
+    yield "\n".join(infos)
+# but5.click(train1key, [exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0method8, save_epoch10, total_epoch11, batch_size12, if_save_latest13, pretrained_G14, pretrained_D15, gpus16, if_cache_gpu17], info3)
+def train1key(
+    exp_dir1,
+    sr2,
+    if_f0_3,
+    trainset_dir4,
+    spk_id5,
+    np7,
+    f0method8,
+    save_epoch10,
+    total_epoch11,
+    batch_size12,
+    if_save_latest13,
+    pretrained_G14,
+    pretrained_D15,
+    gpus16,
+    if_cache_gpu17,
+    if_save_every_weights18,
+    version19,
+):
+    infos = []
+    def get_info_str(strr):
+        infos.append(strr)
+        return "\n".join(infos)
+    model_log_dir = "%s/logs/%s" % (now_dir, exp_dir1)
+    preprocess_log_path = "%s/preprocess.log" % model_log_dir
+    extract_f0_feature_log_path = "%s/extract_f0_feature.log" % model_log_dir
+    gt_wavs_dir = "%s/0_gt_wavs" % model_log_dir
+    feature_dir = (
+        "%s/3_feature256" % model_log_dir
+        if version19 == "v1"
+        else "%s/3_feature768" % model_log_dir
+    )
+    os.makedirs(model_log_dir, exist_ok=True)
+    #########step1:处理数据
+    open(preprocess_log_path, "w").close()
+    cmd = (
+        config.python_cmd
+        + " trainset_preprocess_pipeline_print.py %s %s %s %s "
+        % (trainset_dir4, sr_dict[sr2], np7, model_log_dir)
+        + str(config.noparallel)
+    )
+    yield get_info_str(i18n("step1:正在处理数据"))
+    yield get_info_str(cmd)
+    p = Popen(cmd, shell=True)
+    p.wait()
+    with open(preprocess_log_path, "r") as f:
+        print(f.read())
+    #########step2a:提取音高
+    open(extract_f0_feature_log_path, "w")
+    if if_f0_3:
+        yield get_info_str("step2a:正在提取音高")
+        cmd = config.python_cmd + " extract_f0_print.py %s %s %s" % (
+            model_log_dir,
+            np7,
+            f0method8,
+        )
+        yield get_info_str(cmd)
+        p = Popen(cmd, shell=True, cwd=now_dir)
+        p.wait()
+        with open(extract_f0_feature_log_path, "r") as f:
+            print(f.read())
+    else:
+        yield get_info_str(i18n("step2a:无需提取音高"))
+    #######step2b:提取特征
+    yield get_info_str(i18n("step2b:正在提取特征"))
+    gpus = gpus16.split("-")
+    leng = len(gpus)
+    ps = []
+    for idx, n_g in enumerate(gpus):
+        cmd = config.python_cmd + " extract_feature_print.py %s %s %s %s %s %s" % (
+            config.device,
+            leng,
+            idx,
+            n_g,
+            model_log_dir,
+            version19,
+        )
+        yield get_info_str(cmd)
+        p = Popen(
+            cmd, shell=True, cwd=now_dir
+        )  # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
+        ps.append(p)
+    for p in ps:
+        p.wait()
+    with open(extract_f0_feature_log_path, "r") as f:
+        print(f.read())
+    #######step3a:训练模型
+    yield get_info_str(i18n("step3a:正在训练模型"))
+    # 生成filelist
+    if if_f0_3:
+        f0_dir = "%s/2a_f0" % model_log_dir
+        f0nsf_dir = "%s/2b-f0nsf" % model_log_dir
+        names = (
+            set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
+            & set([name.split(".")[0] for name in os.listdir(feature_dir)])
+            & set([name.split(".")[0] for name in os.listdir(f0_dir)])
+            & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
+        )
+    else:
+        names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
+            [name.split(".")[0] for name in os.listdir(feature_dir)]
+        )
+    opt = []
+    for name in names:
+        if if_f0_3:
+            opt.append(
+                "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s"
+                % (
+                    gt_wavs_dir.replace("\\", "\\\\"),
+                    name,
+                    feature_dir.replace("\\", "\\\\"),
+                    name,
+                    f0_dir.replace("\\", "\\\\"),
+                    name,
+                    f0nsf_dir.replace("\\", "\\\\"),
+                    name,
+                    spk_id5,
+                )
+            )
+        else:
+            opt.append(
+                "%s/%s.wav|%s/%s.npy|%s"
+                % (
+                    gt_wavs_dir.replace("\\", "\\\\"),
+                    name,
+                    feature_dir.replace("\\", "\\\\"),
+                    name,
+                    spk_id5,
+                )
+            )
+    fea_dim = 256 if version19 == "v1" else 768
+    if if_f0_3:
+        for _ in range(2):
+            opt.append(
+                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
+                % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
+            )
+    else:
+        for _ in range(2):
+            opt.append(
+                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
+                % (now_dir, sr2, now_dir, fea_dim, spk_id5)
+            )
+    shuffle(opt)
+    with open("%s/filelist.txt" % model_log_dir, "w") as f:
+        f.write("\n".join(opt))
+    yield get_info_str("write filelist done")
+    if gpus16:
+        cmd = (
+            config.python_cmd
+            + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
+            % (
+                exp_dir1,
+                sr2,
+                1 if if_f0_3 else 0,
+                batch_size12,
+                gpus16,
+                total_epoch11,
+                save_epoch10,
+                "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "",
+                "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "",
+                1 if if_save_latest13 == i18n("是") else 0,
+                1 if if_cache_gpu17 == i18n("是") else 0,
+                1 if if_save_every_weights18 == i18n("是") else 0,
+                version19,
+            )
+        )
+    else:
+        cmd = (
+            config.python_cmd
+            + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
+            % (
+                exp_dir1,
+                sr2,
+                1 if if_f0_3 else 0,
+                batch_size12,
+                total_epoch11,
+                save_epoch10,
+                "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "",
+                "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "",
+                1 if if_save_latest13 == i18n("是") else 0,
+                1 if if_cache_gpu17 == i18n("是") else 0,
+                1 if if_save_every_weights18 == i18n("是") else 0,
+                version19,
+            )
+        )
+    yield get_info_str(cmd)
+    p = Popen(cmd, shell=True, cwd=now_dir)
+    p.wait()
+    yield get_info_str(i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log"))
+    #######step3b:训练索引
+    npys = []
+    listdir_res = list(os.listdir(feature_dir))
+    for name in sorted(listdir_res):
+        phone = np.load("%s/%s" % (feature_dir, name))
+        npys.append(phone)
+    big_npy = np.concatenate(npys, 0)
+    big_npy_idx = np.arange(big_npy.shape[0])
+    np.random.shuffle(big_npy_idx)
+    big_npy = big_npy[big_npy_idx]
+    if big_npy.shape[0] > 2e5:
+        # if(1):
+        info = "Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0]
+        print(info)
+        yield get_info_str(info)
+        try:
+            big_npy = (
+                MiniBatchKMeans(
+                    n_clusters=10000,
+                    verbose=True,
+                    batch_size=256 * config.n_cpu,
+                    compute_labels=False,
+                    init="random",
+                )
+                .fit(big_npy)
+                .cluster_centers_
+            )
+        except:
+            info = traceback.format_exc()
+            print(info)
+            yield get_info_str(info)
+    np.save("%s/total_fea.npy" % model_log_dir, big_npy)
+    # n_ivf =  big_npy.shape[0] // 39
+    n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
+    yield get_info_str("%s,%s" % (big_npy.shape, n_ivf))
+    index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf)
+    yield get_info_str("training index")
+    index_ivf = faiss.extract_index_ivf(index)  #
+    index_ivf.nprobe = 1
+    index.train(big_npy)
+    faiss.write_index(
+        index,
+        "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
+    )
+    yield get_info_str("adding index")
+    batch_size_add = 8192
+    for i in range(0, big_npy.shape[0], batch_size_add):
+        index.add(big_npy[i : i + batch_size_add])
+    faiss.write_index(
+        index,
+        "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
+    )
+    yield get_info_str(
+        "成功构建索引, added_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (n_ivf, index_ivf.nprobe, exp_dir1, version19)
+    )
+    yield get_info_str(i18n("全流程结束！"))
+#                    ckpt_path2.change(change_info_,[ckpt_path2],[sr__,if_f0__])
+def change_info_(ckpt_path):
+    if not os.path.exists(ckpt_path.replace(os.path.basename(ckpt_path), "train.log")):
+        return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
+    try:
+        with open(
+            ckpt_path.replace(os.path.basename(ckpt_path), "train.log"), "r"
+        ) as f:
+            info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1])
+            sr, f0 = info["sample_rate"], info["if_f0"]
+            version = "v2" if ("version" in info and info["version"] == "v2") else "v1"
+            return sr, str(f0), version
+    except:
+        traceback.print_exc()
+        return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
+def export_onnx(ModelPath, ExportedPath):
+    cpt = torch.load(ModelPath, map_location="cpu")
+    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]
+    vec_channels = 256 if cpt.get("version", "v1") == "v1" else 768
+    test_phone = torch.rand(1, 200, vec_channels)  # hidden unit
+    test_phone_lengths = torch.tensor([200]).long()  # hidden unit 长度（貌似没啥用）
+    test_pitch = torch.randint(size=(1, 200), low=5, high=255)  # 基频（单位赫兹）
+    test_pitchf = torch.rand(1, 200)  # nsf基频
+    test_ds = torch.LongTensor([0])  # 说话人ID
+    test_rnd = torch.rand(1, 192, 200)  # 噪声（加入随机因子）
+    device = "cpu"  # 导出时设备（不影响使用模型）
+    net_g = SynthesizerTrnMsNSFsidM(
+        *cpt["config"], is_half=False, version=cpt.get("version", "v1")
+    )  # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
+    net_g.load_state_dict(cpt["weight"], strict=False)
+    input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
+    output_names = [
+        "audio",
+    ]
+    # net_g.construct_spkmixmap(n_speaker) 多角色混合轨道导出
+    torch.onnx.export(
+        net_g,
+        (
+            test_phone.to(device),
+            test_phone_lengths.to(device),
+            test_pitch.to(device),
+            test_pitchf.to(device),
+            test_ds.to(device),
+            test_rnd.to(device),
+        ),
+        ExportedPath,
+        dynamic_axes={
+            "phone": [1],
+            "pitch": [1],
+            "pitchf": [1],
+            "rnd": [2],
+        },
+        do_constant_folding=False,
+        opset_version=13,
+        verbose=False,
+        input_names=input_names,
+        output_names=output_names,
+    )
+    return "Finished"
+# sid0: Inferencing voice/ model name
+# f0_up_key: Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12):
+# opt_root: output folder path
+# f0method: pitch extraction algorithm ('pm': faster extraction but lower-quality speech; 'harvest': better bass but extremely slow; 'crepe': better quality but GPU intensive)
+# filter_radius: If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness.
+# file_index: Path to the feature index file.:
+# Auto-detect index path
+# index_rate: Search feature ratio (controls accent strength, too high has artifacting): minimum=0, maximum=1
+# resample_sr: Resample the output audio in post-processing to the final sample rate. Set to 0 for no resampling (time consuming)
+# rms_mix_rate: Adjust the volume envelope scaling. Closer to 0, the more it mimicks the volume of the original vocals. Can help mask noise and make volume sound more natural when set relatively low. Closer to 1 will be more of a consistently loud volume:
+# protect: Protect voiceless consonants and breath sounds to prevent artifacts such as tearing in electronic music. Set to 0.5 to disable. Decrease the value to increase protection, but it may reduce indexing accuracy
+# dir_path: Enter the path of the audio folder to be processed (copy it from the address bar of the file manager)
+# format1: choices=["wav", "flac", "mp3", "m4a"]
+def run(sid0, paths, dir_path=None, f0_up_key=0, opt_root="opt", f0_method="pm", filter_radius=3, file_index="", file_index2=None, index_rate=1, resample_sr=0, rms_mix_rate=1, protect=0.33, format1="wav"):
+    if (dir_path=='' or dir_path==None) and (paths == '' or paths==None):
+        return "must provide either dir_input or file path"
+    if paths != None or paths != '':
+        tempfile = [file_to_tempfile(paths)]
+    print(paths)
+    print(protect)
+    get_vc(sid0, protect, protect)
+    vc_output3 = vc_multi(
+                0,                    # sid:  0
+                dir_path,                # dir_path:
+                opt_root,                # opt_root:  opt
+                tempfile,                   # paths:  [<tempfile._TemporaryFileWrapper object at 0x7f42c7dbb970>]
+                f0_up_key,               # f0_up_key:  -12.0
+                f0_method,               # f0_method:  pm
+                file_index,              # file_index:
+                file_index2,             # file_index2:
+                index_rate,              # index_rate:  1
+                filter_radius,           # filter_radius:  3
+                resample_sr,             # resample_sr:  0
+                rms_mix_rate,            # rms_mix_rate:  1
+                protect,                 # protect:  0.33
+                format1                  # format1:  wav
+            )
+    out_path = paths
+    wavfile.write(out_path, tgt_sr, vc_output3)
+    return vc_output3
+def get_models():
+    return names
+def file_to_tempfile(file_path):
+    with open(file_path, 'rb') as file:
+        temp_file = tempfile.TemporaryFile()
+        temp_file.write(file.read())
+        temp_file.seek(0)
+        return temp_file
+print(run('mymodelimran.pth', '/home/teewhy/Desktop/RVC/Retrieval-based-Voice-Conversion-WebUI/opt/abcxot47ylz.mp3.mp3'))
+# sid:  0
+# dir_path:
+# opt_root:  opt
+# paths:  [<tempfile._TemporaryFileWrapper object at 0x7f42c7dbb970>]
+# f0_up_key:  -12.0
+# f0_method:  pm
+# file_index:
+# file_index2:
+# index_rate:  1
+# filter_radius:  3
+# resample_sr:  0
+# rms_mix_rate:  1
+# protect:  0.33
+# format1:  wav
+# -12.0
+# input_audio_path:  /tmp/vocal_7x2a5ym2vczt3tom5.mp3_10.wav

test.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import RVC_class
+converter = RVC_class.VoiceConverter()
+result = converter.single_run('https://tmpfiles.org/dl/1669357/recordonline-voice-recorder.com2.wav', 'imran_khan.pth', -12, False)
+print(result)

train_nsf_sim_cache_sid_load_pretrain.py ADDED Viewed

	@@ -0,0 +1,595 @@

+import sys, os
+now_dir = os.getcwd()
+sys.path.append(os.path.join(now_dir))
+sys.path.append(os.path.join(now_dir, "train"))
+import utils
+import datetime
+hps = utils.get_hparams()
+os.environ["CUDA_VISIBLE_DEVICES"] = hps.gpus.replace("-", ",")
+n_gpus = len(hps.gpus.split("-"))
+from random import shuffle, randint
+import traceback, json, argparse, itertools, math, torch, pdb
+torch.backends.cudnn.deterministic = False
+torch.backends.cudnn.benchmark = False
+from torch import nn, optim
+from torch.nn import functional as F
+from torch.utils.data import DataLoader
+from torch.utils.tensorboard import SummaryWriter
+import torch.multiprocessing as mp
+import torch.distributed as dist
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.cuda.amp import autocast, GradScaler
+from infer_pack import commons
+from time import sleep
+from time import time as ttime
+from data_utils import (
+    TextAudioLoaderMultiNSFsid,
+    TextAudioLoader,
+    TextAudioCollateMultiNSFsid,
+    TextAudioCollate,
+    DistributedBucketSampler,
+)
+if hps.version == "v1":
+    from infer_pack.models import (
+        SynthesizerTrnMs256NSFsid as RVC_Model_f0,
+        SynthesizerTrnMs256NSFsid_nono as RVC_Model_nof0,
+        MultiPeriodDiscriminator,
+    )
+else:
+    from infer_pack.models import (
+        SynthesizerTrnMs768NSFsid as RVC_Model_f0,
+        SynthesizerTrnMs768NSFsid_nono as RVC_Model_nof0,
+        MultiPeriodDiscriminatorV2 as MultiPeriodDiscriminator,
+    )
+from losses import generator_loss, discriminator_loss, feature_loss, kl_loss
+from mel_processing import mel_spectrogram_torch, spec_to_mel_torch
+from process_ckpt import savee
+global_step = 0
+class EpochRecorder:
+    def __init__(self):
+        self.last_time = ttime()
+    def record(self):
+        now_time = ttime()
+        elapsed_time = now_time - self.last_time
+        self.last_time = now_time
+        elapsed_time_str = str(datetime.timedelta(seconds=elapsed_time))
+        current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        return f"[{current_time}] | ({elapsed_time_str})"
+def main():
+    n_gpus = torch.cuda.device_count()
+    if torch.cuda.is_available() == False and torch.backends.mps.is_available() == True:
+        n_gpus = 1
+    os.environ["MASTER_ADDR"] = "localhost"
+    os.environ["MASTER_PORT"] = str(randint(20000, 55555))
+    children = []
+    for i in range(n_gpus):
+        subproc = mp.Process(
+            target=run,
+            args=(
+                i,
+                n_gpus,
+                hps,
+            ),
+        )
+        children.append(subproc)
+        subproc.start()
+    for i in range(n_gpus):
+        children[i].join()
+def run(rank, n_gpus, hps):
+    global global_step
+    if rank == 0:
+        logger = utils.get_logger(hps.model_dir)
+        logger.info(hps)
+        # utils.check_git_hash(hps.model_dir)
+        writer = SummaryWriter(log_dir=hps.model_dir)
+        writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval"))
+    dist.init_process_group(
+        backend="gloo", init_method="env://", world_size=n_gpus, rank=rank
+    )
+    torch.manual_seed(hps.train.seed)
+    if torch.cuda.is_available():
+        torch.cuda.set_device(rank)
+    if hps.if_f0 == 1:
+        train_dataset = TextAudioLoaderMultiNSFsid(hps.data.training_files, hps.data)
+    else:
+        train_dataset = TextAudioLoader(hps.data.training_files, hps.data)
+    train_sampler = DistributedBucketSampler(
+        train_dataset,
+        hps.train.batch_size * n_gpus,
+        # [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1200,1400],  # 16s
+        [100, 200, 300, 400, 500, 600, 700, 800, 900],  # 16s
+        num_replicas=n_gpus,
+        rank=rank,
+        shuffle=True,
+    )
+    # It is possible that dataloader's workers are out of shared memory. Please try to raise your shared memory limit.
+    # num_workers=8 -> num_workers=4
+    if hps.if_f0 == 1:
+        collate_fn = TextAudioCollateMultiNSFsid()
+    else:
+        collate_fn = TextAudioCollate()
+    train_loader = DataLoader(
+        train_dataset,
+        num_workers=4,
+        shuffle=False,
+        pin_memory=True,
+        collate_fn=collate_fn,
+        batch_sampler=train_sampler,
+        persistent_workers=True,
+        prefetch_factor=8,
+    )
+    if hps.if_f0 == 1:
+        net_g = RVC_Model_f0(
+            hps.data.filter_length // 2 + 1,
+            hps.train.segment_size // hps.data.hop_length,
+            **hps.model,
+            is_half=hps.train.fp16_run,
+            sr=hps.sample_rate,
+        )
+    else:
+        net_g = RVC_Model_nof0(
+            hps.data.filter_length // 2 + 1,
+            hps.train.segment_size // hps.data.hop_length,
+            **hps.model,
+            is_half=hps.train.fp16_run,
+        )
+    if torch.cuda.is_available():
+        net_g = net_g.cuda(rank)
+    net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm)
+    if torch.cuda.is_available():
+        net_d = net_d.cuda(rank)
+    optim_g = torch.optim.AdamW(
+        net_g.parameters(),
+        hps.train.learning_rate,
+        betas=hps.train.betas,
+        eps=hps.train.eps,
+    )
+    optim_d = torch.optim.AdamW(
+        net_d.parameters(),
+        hps.train.learning_rate,
+        betas=hps.train.betas,
+        eps=hps.train.eps,
+    )
+    # net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
+    # net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
+    if torch.cuda.is_available():
+        net_g = DDP(net_g, device_ids=[rank])
+        net_d = DDP(net_d, device_ids=[rank])
+    else:
+        net_g = DDP(net_g)
+        net_d = DDP(net_d)
+    try:  # 如果能加载自动resume
+        _, _, _, epoch_str = utils.load_checkpoint(
+            utils.latest_checkpoint_path(hps.model_dir, "D_*.pth"), net_d, optim_d
+        )  # D多半加载没事
+        if rank == 0:
+            logger.info("loaded D")
+        # _, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "G_*.pth"), net_g, optim_g,load_opt=0)
+        _, _, _, epoch_str = utils.load_checkpoint(
+            utils.latest_checkpoint_path(hps.model_dir, "G_*.pth"), net_g, optim_g
+        )
+        global_step = (epoch_str - 1) * len(train_loader)
+        # epoch_str = 1
+        # global_step = 0
+    except:  # 如果首次不能加载，加载pretrain
+        # traceback.print_exc()
+        epoch_str = 1
+        global_step = 0
+        if hps.pretrainG != "":
+            if rank == 0:
+                logger.info("loaded pretrained %s" % (hps.pretrainG))
+            print(
+                net_g.module.load_state_dict(
+                    torch.load(hps.pretrainG, map_location="cpu")["model"]
+                )
+            )  ##测试不加载优化器
+        if hps.pretrainD != "":
+            if rank == 0:
+                logger.info("loaded pretrained %s" % (hps.pretrainD))
+            print(
+                net_d.module.load_state_dict(
+                    torch.load(hps.pretrainD, map_location="cpu")["model"]
+                )
+            )
+    scheduler_g = torch.optim.lr_scheduler.ExponentialLR(
+        optim_g, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2
+    )
+    scheduler_d = torch.optim.lr_scheduler.ExponentialLR(
+        optim_d, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2
+    )
+    scaler = GradScaler(enabled=hps.train.fp16_run)
+    cache = []
+    for epoch in range(epoch_str, hps.train.epochs + 1):
+        if rank == 0:
+            train_and_evaluate(
+                rank,
+                epoch,
+                hps,
+                [net_g, net_d],
+                [optim_g, optim_d],
+                [scheduler_g, scheduler_d],
+                scaler,
+                [train_loader, None],
+                logger,
+                [writer, writer_eval],
+                cache,
+            )
+        else:
+            train_and_evaluate(
+                rank,
+                epoch,
+                hps,
+                [net_g, net_d],
+                [optim_g, optim_d],
+                [scheduler_g, scheduler_d],
+                scaler,
+                [train_loader, None],
+                None,
+                None,
+                cache,
+            )
+        scheduler_g.step()
+        scheduler_d.step()
+def train_and_evaluate(
+    rank, epoch, hps, nets, optims, schedulers, scaler, loaders, logger, writers, cache
+):
+    net_g, net_d = nets
+    optim_g, optim_d = optims
+    train_loader, eval_loader = loaders
+    if writers is not None:
+        writer, writer_eval = writers
+    train_loader.batch_sampler.set_epoch(epoch)
+    global global_step
+    net_g.train()
+    net_d.train()
+    # Prepare data iterator
+    if hps.if_cache_data_in_gpu == True:
+        # Use Cache
+        data_iterator = cache
+        if cache == []:
+            # Make new cache
+            for batch_idx, info in enumerate(train_loader):
+                # Unpack
+                if hps.if_f0 == 1:
+                    (
+                        phone,
+                        phone_lengths,
+                        pitch,
+                        pitchf,
+                        spec,
+                        spec_lengths,
+                        wave,
+                        wave_lengths,
+                        sid,
+                    ) = info
+                else:
+                    (
+                        phone,
+                        phone_lengths,
+                        spec,
+                        spec_lengths,
+                        wave,
+                        wave_lengths,
+                        sid,
+                    ) = info
+                # Load on CUDA
+                if torch.cuda.is_available():
+                    phone = phone.cuda(rank, non_blocking=True)
+                    phone_lengths = phone_lengths.cuda(rank, non_blocking=True)
+                    if hps.if_f0 == 1:
+                        pitch = pitch.cuda(rank, non_blocking=True)
+                        pitchf = pitchf.cuda(rank, non_blocking=True)
+                    sid = sid.cuda(rank, non_blocking=True)
+                    spec = spec.cuda(rank, non_blocking=True)
+                    spec_lengths = spec_lengths.cuda(rank, non_blocking=True)
+                    wave = wave.cuda(rank, non_blocking=True)
+                    wave_lengths = wave_lengths.cuda(rank, non_blocking=True)
+                # Cache on list
+                if hps.if_f0 == 1:
+                    cache.append(
+                        (
+                            batch_idx,
+                            (
+                                phone,
+                                phone_lengths,
+                                pitch,
+                                pitchf,
+                                spec,
+                                spec_lengths,
+                                wave,
+                                wave_lengths,
+                                sid,
+                            ),
+                        )
+                    )
+                else:
+                    cache.append(
+                        (
+                            batch_idx,
+                            (
+                                phone,
+                                phone_lengths,
+                                spec,
+                                spec_lengths,
+                                wave,
+                                wave_lengths,
+                                sid,
+                            ),
+                        )
+                    )
+        else:
+            # Load shuffled cache
+            shuffle(cache)
+    else:
+        # Loader
+        data_iterator = enumerate(train_loader)
+    # Run steps
+    epoch_recorder = EpochRecorder()
+    for batch_idx, info in data_iterator:
+        # Data
+        ## Unpack
+        if hps.if_f0 == 1:
+            (
+                phone,
+                phone_lengths,
+                pitch,
+                pitchf,
+                spec,
+                spec_lengths,
+                wave,
+                wave_lengths,
+                sid,
+            ) = info
+        else:
+            phone, phone_lengths, spec, spec_lengths, wave, wave_lengths, sid = info
+        ## Load on CUDA
+        if (hps.if_cache_data_in_gpu == False) and torch.cuda.is_available():
+            phone = phone.cuda(rank, non_blocking=True)
+            phone_lengths = phone_lengths.cuda(rank, non_blocking=True)
+            if hps.if_f0 == 1:
+                pitch = pitch.cuda(rank, non_blocking=True)
+                pitchf = pitchf.cuda(rank, non_blocking=True)
+            sid = sid.cuda(rank, non_blocking=True)
+            spec = spec.cuda(rank, non_blocking=True)
+            spec_lengths = spec_lengths.cuda(rank, non_blocking=True)
+            wave = wave.cuda(rank, non_blocking=True)
+            # wave_lengths = wave_lengths.cuda(rank, non_blocking=True)
+        # Calculate
+        with autocast(enabled=hps.train.fp16_run):
+            if hps.if_f0 == 1:
+                (
+                    y_hat,
+                    ids_slice,
+                    x_mask,
+                    z_mask,
+                    (z, z_p, m_p, logs_p, m_q, logs_q),
+                ) = net_g(phone, phone_lengths, pitch, pitchf, spec, spec_lengths, sid)
+            else:
+                (
+                    y_hat,
+                    ids_slice,
+                    x_mask,
+                    z_mask,
+                    (z, z_p, m_p, logs_p, m_q, logs_q),
+                ) = net_g(phone, phone_lengths, spec, spec_lengths, sid)
+            mel = spec_to_mel_torch(
+                spec,
+                hps.data.filter_length,
+                hps.data.n_mel_channels,
+                hps.data.sampling_rate,
+                hps.data.mel_fmin,
+                hps.data.mel_fmax,
+            )
+            y_mel = commons.slice_segments(
+                mel, ids_slice, hps.train.segment_size // hps.data.hop_length
+            )
+            with autocast(enabled=False):
+                y_hat_mel = mel_spectrogram_torch(
+                    y_hat.float().squeeze(1),
+                    hps.data.filter_length,
+                    hps.data.n_mel_channels,
+                    hps.data.sampling_rate,
+                    hps.data.hop_length,
+                    hps.data.win_length,
+                    hps.data.mel_fmin,
+                    hps.data.mel_fmax,
+                )
+            if hps.train.fp16_run == True:
+                y_hat_mel = y_hat_mel.half()
+            wave = commons.slice_segments(
+                wave, ids_slice * hps.data.hop_length, hps.train.segment_size
+            )  # slice
+            # Discriminator
+            y_d_hat_r, y_d_hat_g, _, _ = net_d(wave, y_hat.detach())
+            with autocast(enabled=False):
+                loss_disc, losses_disc_r, losses_disc_g = discriminator_loss(
+                    y_d_hat_r, y_d_hat_g
+                )
+        optim_d.zero_grad()
+        scaler.scale(loss_disc).backward()
+        scaler.unscale_(optim_d)
+        grad_norm_d = commons.clip_grad_value_(net_d.parameters(), None)
+        scaler.step(optim_d)
+        with autocast(enabled=hps.train.fp16_run):
+            # Generator
+            y_d_hat_r, y_d_hat_g, fmap_r, fmap_g = net_d(wave, y_hat)
+            with autocast(enabled=False):
+                loss_mel = F.l1_loss(y_mel, y_hat_mel) * hps.train.c_mel
+                loss_kl = kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * hps.train.c_kl
+                loss_fm = feature_loss(fmap_r, fmap_g)
+                loss_gen, losses_gen = generator_loss(y_d_hat_g)
+                loss_gen_all = loss_gen + loss_fm + loss_mel + loss_kl
+        optim_g.zero_grad()
+        scaler.scale(loss_gen_all).backward()
+        scaler.unscale_(optim_g)
+        grad_norm_g = commons.clip_grad_value_(net_g.parameters(), None)
+        scaler.step(optim_g)
+        scaler.update()
+        if rank == 0:
+            if global_step % hps.train.log_interval == 0:
+                lr = optim_g.param_groups[0]["lr"]
+                logger.info(
+                    "Train Epoch: {} [{:.0f}%]".format(
+                        epoch, 100.0 * batch_idx / len(train_loader)
+                    )
+                )
+                # Amor For Tensorboard display
+                if loss_mel > 75:
+                    loss_mel = 75
+                if loss_kl > 9:
+                    loss_kl = 9
+                logger.info([global_step, lr])
+                logger.info(
+                    f"loss_disc={loss_disc:.3f}, loss_gen={loss_gen:.3f}, loss_fm={loss_fm:.3f},loss_mel={loss_mel:.3f}, loss_kl={loss_kl:.3f}"
+                )
+                scalar_dict = {
+                    "loss/g/total": loss_gen_all,
+                    "loss/d/total": loss_disc,
+                    "learning_rate": lr,
+                    "grad_norm_d": grad_norm_d,
+                    "grad_norm_g": grad_norm_g,
+                }
+                scalar_dict.update(
+                    {
+                        "loss/g/fm": loss_fm,
+                        "loss/g/mel": loss_mel,
+                        "loss/g/kl": loss_kl,
+                    }
+                )
+                scalar_dict.update(
+                    {"loss/g/{}".format(i): v for i, v in enumerate(losses_gen)}
+                )
+                scalar_dict.update(
+                    {"loss/d_r/{}".format(i): v for i, v in enumerate(losses_disc_r)}
+                )
+                scalar_dict.update(
+                    {"loss/d_g/{}".format(i): v for i, v in enumerate(losses_disc_g)}
+                )
+                image_dict = {
+                    "slice/mel_org": utils.plot_spectrogram_to_numpy(
+                        y_mel[0].data.cpu().numpy()
+                    ),
+                    "slice/mel_gen": utils.plot_spectrogram_to_numpy(
+                        y_hat_mel[0].data.cpu().numpy()
+                    ),
+                    "all/mel": utils.plot_spectrogram_to_numpy(
+                        mel[0].data.cpu().numpy()
+                    ),
+                }
+                utils.summarize(
+                    writer=writer,
+                    global_step=global_step,
+                    images=image_dict,
+                    scalars=scalar_dict,
+                )
+        global_step += 1
+    # /Run steps
+    if epoch % hps.save_every_epoch == 0 and rank == 0:
+        if hps.if_latest == 0:
+            utils.save_checkpoint(
+                net_g,
+                optim_g,
+                hps.train.learning_rate,
+                epoch,
+                os.path.join(hps.model_dir, "G_{}.pth".format(global_step)),
+            )
+            utils.save_checkpoint(
+                net_d,
+                optim_d,
+                hps.train.learning_rate,
+                epoch,
+                os.path.join(hps.model_dir, "D_{}.pth".format(global_step)),
+            )
+        else:
+            utils.save_checkpoint(
+                net_g,
+                optim_g,
+                hps.train.learning_rate,
+                epoch,
+                os.path.join(hps.model_dir, "G_{}.pth".format(2333333)),
+            )
+            utils.save_checkpoint(
+                net_d,
+                optim_d,
+                hps.train.learning_rate,
+                epoch,
+                os.path.join(hps.model_dir, "D_{}.pth".format(2333333)),
+            )
+        if rank == 0 and hps.save_every_weights == "1":
+            if hasattr(net_g, "module"):
+                ckpt = net_g.module.state_dict()
+            else:
+                ckpt = net_g.state_dict()
+            logger.info(
+                "saving ckpt %s_e%s:%s"
+                % (
+                    hps.name,
+                    epoch,
+                    savee(
+                        ckpt,
+                        hps.sample_rate,
+                        hps.if_f0,
+                        hps.name + "_e%s_s%s" % (epoch, global_step),
+                        epoch,
+                        hps.version,
+                        hps,
+                    ),
+                )
+            )
+    if rank == 0:
+        logger.info("====> Epoch: {} {}".format(epoch, epoch_recorder.record()))
+    if epoch >= hps.total_epoch and rank == 0:
+        logger.info("Training is done. The program is closed.")
+        if hasattr(net_g, "module"):
+            ckpt = net_g.module.state_dict()
+        else:
+            ckpt = net_g.state_dict()
+        logger.info(
+            "saving final ckpt:%s"
+            % (
+                savee(
+                    ckpt, hps.sample_rate, hps.if_f0, hps.name, epoch, hps.version, hps
+                )
+            )
+        )
+        sleep(1)
+        os._exit(2333333)
+if __name__ == "__main__":
+    torch.multiprocessing.set_start_method("spawn")
+    main()

trainset_preprocess_pipeline_print.py ADDED Viewed

	@@ -0,0 +1,139 @@

+import sys, os, multiprocessing
+from scipy import signal
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+inp_root = sys.argv[1]
+sr = int(sys.argv[2])
+n_p = int(sys.argv[3])
+exp_dir = sys.argv[4]
+noparallel = sys.argv[5] == "True"
+import numpy as np, os, traceback
+from slicer2 import Slicer
+import librosa, traceback
+from scipy.io import wavfile
+import multiprocessing
+from my_utils import load_audio
+mutex = multiprocessing.Lock()
+f = open("%s/preprocess.log" % exp_dir, "a+")
+def println(strr):
+    mutex.acquire()
+    print(strr)
+    f.write("%s\n" % strr)
+    f.flush()
+    mutex.release()
+class PreProcess:
+    def __init__(self, sr, exp_dir):
+        self.slicer = Slicer(
+            sr=sr,
+            threshold=-42,
+            min_length=1500,
+            min_interval=400,
+            hop_size=15,
+            max_sil_kept=500,
+        )
+        self.sr = sr
+        self.bh, self.ah = signal.butter(N=5, Wn=48, btype="high", fs=self.sr)
+        self.per = 3.0
+        self.overlap = 0.3
+        self.tail = self.per + self.overlap
+        self.max = 0.9
+        self.alpha = 0.75
+        self.exp_dir = exp_dir
+        self.gt_wavs_dir = "%s/0_gt_wavs" % exp_dir
+        self.wavs16k_dir = "%s/1_16k_wavs" % exp_dir
+        os.makedirs(self.exp_dir, exist_ok=True)
+        os.makedirs(self.gt_wavs_dir, exist_ok=True)
+        os.makedirs(self.wavs16k_dir, exist_ok=True)
+    def norm_write(self, tmp_audio, idx0, idx1):
+        tmp_max = np.abs(tmp_audio).max()
+        if tmp_max > 2.5:
+            print("%s-%s-%s-filtered" % (idx0, idx1, tmp_max))
+            return
+        tmp_audio = (tmp_audio / tmp_max * (self.max * self.alpha)) + (
+            1 - self.alpha
+        ) * tmp_audio
+        wavfile.write(
+            "%s/%s_%s.wav" % (self.gt_wavs_dir, idx0, idx1),
+            self.sr,
+            tmp_audio.astype(np.float32),
+        )
+        tmp_audio = librosa.resample(
+            tmp_audio, orig_sr=self.sr, target_sr=16000
+        )  # , res_type="soxr_vhq"
+        wavfile.write(
+            "%s/%s_%s.wav" % (self.wavs16k_dir, idx0, idx1),
+            16000,
+            tmp_audio.astype(np.float32),
+        )
+    def pipeline(self, path, idx0):
+        try:
+            audio = load_audio(path, self.sr)
+            # zero phased digital filter cause pre-ringing noise...
+            # audio = signal.filtfilt(self.bh, self.ah, audio)
+            audio = signal.lfilter(self.bh, self.ah, audio)
+            idx1 = 0
+            for audio in self.slicer.slice(audio):
+                i = 0
+                while 1:
+                    start = int(self.sr * (self.per - self.overlap) * i)
+                    i += 1
+                    if len(audio[start:]) > self.tail * self.sr:
+                        tmp_audio = audio[start : start + int(self.per * self.sr)]
+                        self.norm_write(tmp_audio, idx0, idx1)
+                        idx1 += 1
+                    else:
+                        tmp_audio = audio[start:]
+                        idx1 += 1
+                        break
+                self.norm_write(tmp_audio, idx0, idx1)
+            println("%s->Suc." % path)
+        except:
+            println("%s->%s" % (path, traceback.format_exc()))
+    def pipeline_mp(self, infos):
+        for path, idx0 in infos:
+            self.pipeline(path, idx0)
+    def pipeline_mp_inp_dir(self, inp_root, n_p):
+        try:
+            infos = [
+                ("%s/%s" % (inp_root, name), idx)
+                for idx, name in enumerate(sorted(list(os.listdir(inp_root))))
+            ]
+            if noparallel:
+                for i in range(n_p):
+                    self.pipeline_mp(infos[i::n_p])
+            else:
+                ps = []
+                for i in range(n_p):
+                    p = multiprocessing.Process(
+                        target=self.pipeline_mp, args=(infos[i::n_p],)
+                    )
+                    ps.append(p)
+                    p.start()
+                for i in range(n_p):
+                    ps[i].join()
+        except:
+            println("Fail. %s" % traceback.format_exc())
+def preprocess_trainset(inp_root, sr, n_p, exp_dir):
+    pp = PreProcess(sr, exp_dir)
+    println("start preprocess")
+    println(sys.argv)
+    pp.pipeline_mp_inp_dir(inp_root, n_p)
+    println("end preprocess")
+if __name__ == "__main__":
+    preprocess_trainset(inp_root, sr, n_p, exp_dir)

vc_infer_pipeline.py ADDED Viewed

	@@ -0,0 +1,449 @@

+import numpy as np, parselmouth, torch, pdb
+from time import time as ttime
+import torch.nn.functional as F
+import scipy.signal as signal
+import pyworld, os, traceback, faiss, librosa, torchcrepe
+from scipy import signal
+from functools import lru_cache
+import logging
+bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)
+input_audio_path2wav = {}
+@lru_cache
+def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
+    audio = input_audio_path2wav[input_audio_path]
+    f0, t = pyworld.harvest(
+        audio,
+        fs=fs,
+        f0_ceil=f0max,
+        f0_floor=f0min,
+        frame_period=frame_period,
+    )
+    f0 = pyworld.stonemask(audio, f0, t, fs)
+    return f0
+def change_rms(data1, sr1, data2, sr2, rate):  # 1是输入音频，2是输出音频,rate是2的占比
+    # print(data1.max(),data2.max())
+    rms1 = librosa.feature.rms(
+        y=data1, frame_length=sr1 // 2 * 2, hop_length=sr1 // 2
+    )  # 每半秒一个点
+    rms2 = librosa.feature.rms(y=data2, frame_length=sr2 // 2 * 2, hop_length=sr2 // 2)
+    rms1 = torch.from_numpy(rms1)
+    rms1 = F.interpolate(
+        rms1.unsqueeze(0), size=data2.shape[0], mode="linear"
+    ).squeeze()
+    rms2 = torch.from_numpy(rms2)
+    rms2 = F.interpolate(
+        rms2.unsqueeze(0), size=data2.shape[0], mode="linear"
+    ).squeeze()
+    rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-6)
+    data2 *= (
+        torch.pow(rms1, torch.tensor(1 - rate))
+        * torch.pow(rms2, torch.tensor(rate - 1))
+    ).numpy()
+    return data2
+class VC(object):
+    def __init__(self, tgt_sr, config):
+        self.x_pad, self.x_query, self.x_center, self.x_max, self.is_half = (
+            config.x_pad,
+            config.x_query,
+            config.x_center,
+            config.x_max,
+            config.is_half,
+        )
+        self.sr = 16000  # hubert输入采样率
+        self.window = 160  # 每帧点数
+        self.t_pad = self.sr * self.x_pad  # 每条前后pad时间
+        self.t_pad_tgt = tgt_sr * self.x_pad
+        self.t_pad2 = self.t_pad * 2
+        self.t_query = self.sr * self.x_query  # 查询切点前后查询时间
+        self.t_center = self.sr * self.x_center  # 查询切点位置
+        self.t_max = self.sr * self.x_max  # 免查询时长阈值
+        self.device = config.device
+    def get_f0(
+        self,
+        input_audio_path,
+        x,
+        p_len,
+        f0_up_key,
+        f0_method,
+        filter_radius,
+        inp_f0=None,
+    ):
+        logging.info("start get_f0")
+        global input_audio_path2wav
+        time_step = self.window / self.sr * 1000
+        f0_min = 50
+        f0_max = 1100
+        f0_mel_min = 1127 * np.log(1 + f0_min / 700)
+        f0_mel_max = 1127 * np.log(1 + f0_max / 700)
+        logging.info("before if_method == pm")
+        if f0_method == "pm":
+            f0 = (
+                parselmouth.Sound(x, self.sr)
+                .to_pitch_ac(
+                    time_step=time_step / 1000,
+                    voicing_threshold=0.6,
+                    pitch_floor=f0_min,
+                    pitch_ceiling=f0_max,
+                )
+                .selected_array["frequency"]
+            )
+            pad_size = (p_len - len(f0) + 1) // 2
+            if pad_size > 0 or p_len - len(f0) - pad_size > 0:
+                f0 = np.pad(
+                    f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
+                )
+        elif f0_method == "harvest":
+            input_audio_path2wav[input_audio_path] = x.astype(np.double)
+            f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
+            if filter_radius > 2:
+                f0 = signal.medfilt(f0, 3)
+        elif f0_method == "crepe":
+            logging.info("inside elif if_method == crepe")
+            model = "full"
+            # Pick a batch size that doesn't cause memory errors on your gpu
+            batch_size = 512
+            # Compute pitch using first gpu
+            audio = torch.tensor(np.copy(x))[None].float()
+            logging.info("before torchcrepe.predict")
+            f0, pd = torchcrepe.predict(
+                audio,
+                self.sr,
+                self.window,
+                f0_min,
+                f0_max,
+                model,
+                batch_size=batch_size,
+                device=self.device,
+                return_periodicity=True,
+            )
+            logging.info("after torchcrepe.predict")
+            pd = torchcrepe.filter.median(pd, 3)
+            f0 = torchcrepe.filter.mean(f0, 3)
+            f0[pd < 0.1] = 0
+            f0 = f0[0].cpu().numpy()
+        f0 *= pow(2, f0_up_key / 12)
+        # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
+        tf0 = self.sr // self.window  # 每秒f0点数
+        if inp_f0 is not None:
+            delta_t = np.round(
+                (inp_f0[:, 0].max() - inp_f0[:, 0].min()) * tf0 + 1
+            ).astype("int16")
+            replace_f0 = np.interp(
+                list(range(delta_t)), inp_f0[:, 0] * 100, inp_f0[:, 1]
+            )
+            shape = f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)].shape[0]
+            f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)] = replace_f0[
+                :shape
+            ]
+        # with open("test_opt.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
+        f0bak = f0.copy()
+        f0_mel = 1127 * np.log(1 + f0 / 700)
+        f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / (
+            f0_mel_max - f0_mel_min
+        ) + 1
+        f0_mel[f0_mel <= 1] = 1
+        f0_mel[f0_mel > 255] = 255
+        f0_coarse = np.rint(f0_mel).astype(np.int)
+        return f0_coarse, f0bak  # 1-0
+    def vc(
+        self,
+        model,
+        net_g,
+        sid,
+        audio0,
+        pitch,
+        pitchf,
+        times,
+        index,
+        big_npy,
+        index_rate,
+        version,
+        protect,
+    ):  # ,file_index,file_big_npy
+        feats = torch.from_numpy(audio0)
+        if self.is_half:
+            feats = feats.half()
+        else:
+            feats = feats.float()
+        if feats.dim() == 2:  # double channels
+            feats = feats.mean(-1)
+        assert feats.dim() == 1, feats.dim()
+        feats = feats.view(1, -1)
+        padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
+        inputs = {
+            "source": feats.to(self.device),
+            "padding_mask": padding_mask,
+            "output_layer": 9 if version == "v1" else 12,
+        }
+        t0 = ttime()
+        with torch.no_grad():
+            logits = model.extract_features(**inputs)
+            feats = model.final_proj(logits[0]) if version == "v1" else logits[0]
+        if protect < 0.5 and pitch != None and pitchf != None:
+            feats0 = feats.clone()
+        if (
+            isinstance(index, type(None)) == False
+            and isinstance(big_npy, type(None)) == False
+            and index_rate != 0
+        ):
+            npy = feats[0].cpu().numpy()
+            if self.is_half:
+                npy = npy.astype("float32")
+            # _, I = index.search(npy, 1)
+            # npy = big_npy[I.squeeze()]
+            score, ix = index.search(npy, k=8)
+            weight = np.square(1 / score)
+            weight /= weight.sum(axis=1, keepdims=True)
+            npy = np.sum(big_npy[ix] * np.expand_dims(weight, axis=2), axis=1)
+            if self.is_half:
+                npy = npy.astype("float16")
+            feats = (
+                torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate
+                + (1 - index_rate) * feats
+            )
+        feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
+        if protect < 0.5 and pitch != None and pitchf != None:
+            feats0 = F.interpolate(feats0.permute(0, 2, 1), scale_factor=2).permute(
+                0, 2, 1
+            )
+        t1 = ttime()
+        p_len = audio0.shape[0] // self.window
+        if feats.shape[1] < p_len:
+            p_len = feats.shape[1]
+            if pitch != None and pitchf != None:
+                pitch = pitch[:, :p_len]
+                pitchf = pitchf[:, :p_len]
+        if protect < 0.5 and pitch != None and pitchf != None:
+            pitchff = pitchf.clone()
+            pitchff[pitchf > 0] = 1
+            pitchff[pitchf < 1] = protect
+            pitchff = pitchff.unsqueeze(-1)
+            feats = feats * pitchff + feats0 * (1 - pitchff)
+            feats = feats.to(feats0.dtype)
+        p_len = torch.tensor([p_len], device=self.device).long()
+        with torch.no_grad():
+            if pitch != None and pitchf != None:
+                audio1 = (
+                    (net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0])
+                    .data.cpu()
+                    .float()
+                    .numpy()
+                )
+            else:
+                audio1 = (
+                    (net_g.infer(feats, p_len, sid)[0][0, 0]).data.cpu().float().numpy()
+                )
+        del feats, p_len, padding_mask
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        t2 = ttime()
+        times[0] += t1 - t0
+        times[2] += t2 - t1
+        return audio1
+    def pipeline(
+        self,
+        model,
+        net_g,
+        sid,
+        audio,
+        input_audio_path,
+        times,
+        f0_up_key,
+        f0_method,
+        file_index,
+        # file_big_npy,
+        index_rate,
+        if_f0,
+        filter_radius,
+        tgt_sr,
+        resample_sr,
+        rms_mix_rate,
+        version,
+        protect,
+        f0_file=None,
+    ):
+        logging.info("pipeline starts")
+        if (
+            file_index != ""
+            # and file_big_npy != ""
+            # and os.path.exists(file_big_npy) == True
+            and os.path.exists(file_index) == True
+            and index_rate != 0
+        ):
+            try:
+                index = faiss.read_index(file_index)
+                # big_npy = np.load(file_big_npy)
+                big_npy = index.reconstruct_n(0, index.ntotal)
+            except:
+                traceback.print_exc()
+                index = big_npy = None
+        else:
+            index = big_npy = None
+        logging.info("index: ")
+        audio = signal.filtfilt(bh, ah, audio)
+        audio_pad = np.pad(audio, (self.window // 2, self.window // 2), mode="reflect")
+        opt_ts = []
+        if audio_pad.shape[0] > self.t_max:
+            audio_sum = np.zeros_like(audio)
+            for i in range(self.window):
+                audio_sum += audio_pad[i : i - self.window]
+            for t in range(self.t_center, audio.shape[0], self.t_center):
+                opt_ts.append(
+                    t
+                    - self.t_query
+                    + np.where(
+                        np.abs(audio_sum[t - self.t_query : t + self.t_query])
+                        == np.abs(audio_sum[t - self.t_query : t + self.t_query]).min()
+                    )[0][0]
+                )
+        logging.info("opt_ts: ")
+        s = 0
+        audio_opt = []
+        t = None
+        t1 = ttime()
+        audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode="reflect")
+        p_len = audio_pad.shape[0] // self.window
+        inp_f0 = None
+        if hasattr(f0_file, "name") == True:
+            try:
+                with open(f0_file.name, "r") as f:
+                    lines = f.read().strip("\n").split("\n")
+                inp_f0 = []
+                for line in lines:
+                    inp_f0.append([float(i) for i in line.split(",")])
+                inp_f0 = np.array(inp_f0, dtype="float32")
+            except:
+                traceback.print_exc()
+        sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
+        logging.info("sid: ")
+        pitch, pitchf = None, None
+        logging.info("if_f0: ")
+        if if_f0 == 1:
+            logging.info("inside if_f0: ")
+            pitch, pitchf = self.get_f0(
+                input_audio_path,
+                audio_pad,
+                p_len,
+                f0_up_key,
+                f0_method,
+                filter_radius,
+                inp_f0,
+            )
+            logging.info("after get_f0")
+            pitch = pitch[:p_len]
+            pitchf = pitchf[:p_len]
+            if self.device == "mps":
+                pitchf = pitchf.astype(np.float32)
+            logging.info("12345")
+            pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
+            pitchf = torch.tensor(pitchf, device=self.device).unsqueeze(0).float()
+            logging.info("6789")
+        logging.info("after if_f0: ")
+        t2 = ttime()
+        times[1] += t2 - t1
+        for t in opt_ts:
+            t = t // self.window * self.window
+            if if_f0 == 1:
+                audio_opt.append(
+                    self.vc(
+                        model,
+                        net_g,
+                        sid,
+                        audio_pad[s : t + self.t_pad2 + self.window],
+                        pitch[:, s // self.window : (t + self.t_pad2) // self.window],
+                        pitchf[:, s // self.window : (t + self.t_pad2) // self.window],
+                        times,
+                        index,
+                        big_npy,
+                        index_rate,
+                        version,
+                        protect,
+                    )[self.t_pad_tgt : -self.t_pad_tgt]
+                )
+            else:
+                audio_opt.append(
+                    self.vc(
+                        model,
+                        net_g,
+                        sid,
+                        audio_pad[s : t + self.t_pad2 + self.window],
+                        None,
+                        None,
+                        times,
+                        index,
+                        big_npy,
+                        index_rate,
+                        version,
+                        protect,
+                    )[self.t_pad_tgt : -self.t_pad_tgt]
+                )
+            s = t
+        if if_f0 == 1:
+            audio_opt.append(
+                self.vc(
+                    model,
+                    net_g,
+                    sid,
+                    audio_pad[t:],
+                    pitch[:, t // self.window :] if t is not None else pitch,
+                    pitchf[:, t // self.window :] if t is not None else pitchf,
+                    times,
+                    index,
+                    big_npy,
+                    index_rate,
+                    version,
+                    protect,
+                )[self.t_pad_tgt : -self.t_pad_tgt]
+            )
+        else:
+            audio_opt.append(
+                self.vc(
+                    model,
+                    net_g,
+                    sid,
+                    audio_pad[t:],
+                    None,
+                    None,
+                    times,
+                    index,
+                    big_npy,
+                    index_rate,
+                    version,
+                    protect,
+                )[self.t_pad_tgt : -self.t_pad_tgt]
+            )
+        audio_opt = np.concatenate(audio_opt)
+        if rms_mix_rate != 1:
+            audio_opt = change_rms(audio, 16000, audio_opt, tgt_sr, rms_mix_rate)
+        if resample_sr >= 16000 and tgt_sr != resample_sr:
+            audio_opt = librosa.resample(
+                audio_opt, orig_sr=tgt_sr, target_sr=resample_sr
+            )
+        audio_max = np.abs(audio_opt).max() / 0.99
+        max_int16 = 32768
+        if audio_max > 1:
+            max_int16 /= audio_max
+        audio_opt = (audio_opt * max_int16).astype(np.int16)
+        del pitch, pitchf, sid
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        return audio_opt