File size: 9,130 Bytes
1c75048
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import torch, pdb, os,traceback,sys,warnings,shutil
now_dir=os.getcwd()
sys.path.append(now_dir)
tmp=os.path.join(now_dir,"TEMP")
shutil.rmtree(tmp,ignore_errors=True)
os.makedirs(tmp,exist_ok=True)
os.environ["TEMP"]=tmp
warnings.filterwarnings("ignore")
torch.manual_seed(114514)
from infer_pack.models import SynthesizerTrnMs256NSF as SynthesizerTrn256
from scipy.io import wavfile
from fairseq import checkpoint_utils
import gradio as gr
import librosa
import logging
from vc_infer_pipeline import VC
import soundfile as sf
from config import is_half,device,is_half
from infer_uvr5 import _audio_pre_
logging.getLogger('numba').setLevel(logging.WARNING)

models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(["hubert_base.pt"],suffix="",)
hubert_model = models[0]
hubert_model = hubert_model.to(device)
if(is_half):hubert_model = hubert_model.half()
else:hubert_model = hubert_model.float()
hubert_model.eval()


weight_root="weights"
weight_uvr5_root="uvr5_weights"
names=[]
for name in os.listdir(weight_root):names.append(name.replace(".pt",""))
uvr5_names=[]
for name in os.listdir(weight_uvr5_root):uvr5_names.append(name.replace(".pth",""))

def get_vc(sid):
    person = "%s/%s.pt" % (weight_root, sid)
    cpt = torch.load(person, map_location="cpu")
    dv = cpt["dv"]
    tgt_sr = cpt["config"][-1]
    net_g = SynthesizerTrn256(*cpt["config"], is_half=is_half)
    net_g.load_state_dict(cpt["weight"], strict=True)
    net_g.eval().to(device)
    if (is_half):net_g = net_g.half()
    else:net_g = net_g.float()
    vc = VC(tgt_sr, device, is_half)
    return dv,tgt_sr,net_g,vc

def vc_single(sid,input_audio,f0_up_key,f0_file):
    if input_audio is None:return "You need to upload an audio", None
    f0_up_key = int(f0_up_key)
    try:
        if(type(input_audio)==str):
            print("processing %s" % input_audio)
            audio, sampling_rate = sf.read(input_audio)
        else:
            sampling_rate, audio = input_audio
            audio = audio.astype("float32") / 32768
        if(type(sid)==str):dv, tgt_sr, net_g, vc=get_vc(sid)
        else:dv,tgt_sr,net_g,vc=sid
        if len(audio.shape) > 1:
            audio = librosa.to_mono(audio.transpose(1, 0))
        if sampling_rate != 16000:
            audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
        times = [0, 0, 0]
        audio_opt=vc.pipeline(hubert_model,net_g,dv,audio,times,f0_up_key,f0_file=f0_file)
        print(times)
        return "Success", (tgt_sr, audio_opt)
    except:
        info=traceback.format_exc()
        print(info)
        return info,(None,None)
    finally:
        print("clean_empty_cache")
        del net_g,dv,vc
        torch.cuda.empty_cache()

def vc_multi(sid,dir_path,opt_root,paths,f0_up_key):
    try:
        dir_path=dir_path.strip(" ")#防止小白拷路径头尾带了空格
        opt_root=opt_root.strip(" ")
        os.makedirs(opt_root, exist_ok=True)
        dv, tgt_sr, net_g, vc = get_vc(sid)
        try:
            if(dir_path!=""):paths=[os.path.join(dir_path,name)for name in os.listdir(dir_path)]
            else:paths=[path.name for path in paths]
        except:
            traceback.print_exc()
            paths = [path.name for path in paths]
        infos=[]
        for path in paths:
            info,opt=vc_single([dv,tgt_sr,net_g,vc],path,f0_up_key,f0_file=None)
            if(info=="Success"):
                try:
                    tgt_sr,audio_opt=opt
                    wavfile.write("%s/%s" % (opt_root, os.path.basename(path)), tgt_sr, audio_opt)
                except:
                    info=traceback.format_exc()
            infos.append("%s->%s"%(os.path.basename(path),info))
        return "\n".join(infos)
    except:
        return traceback.format_exc()
    finally:
        print("clean_empty_cache")
        del net_g,dv,vc
        torch.cuda.empty_cache()

def uvr(model_name,inp_root,save_root_vocal,save_root_ins):
    infos = []
    try:
        inp_root = inp_root.strip(" ")# 防止小白拷路径头尾带了空格
        save_root_vocal = save_root_vocal.strip(" ")
        save_root_ins = save_root_ins.strip(" ")
        pre_fun = _audio_pre_(model_path=os.path.join(weight_uvr5_root,model_name+".pth"), device=device, is_half=is_half)
        for name in os.listdir(inp_root):
            inp_path=os.path.join(inp_root,name)
            try:
                pre_fun._path_audio_(inp_path , save_root_ins,save_root_vocal)
                infos.append("%s->Success"%(os.path.basename(inp_path)))
            except:
                infos.append("%s->%s" % (os.path.basename(inp_path),traceback.format_exc()))
    except:
        infos.append(traceback.format_exc())
    finally:
        try:
            del pre_fun.model
            del pre_fun
        except:
            traceback.print_exc()
        print("clean_empty_cache")
        torch.cuda.empty_cache()
    return "\n".join(infos)

with gr.Blocks() as app:
    with gr.Tabs():
        with gr.TabItem("推理"):
            with gr.Group():
                gr.Markdown(value="""
                    使用软件者、传播软件导出的声音者自负全责。如不认可该条款,则不能使用/引用软件包内所有代码和文件。<br>
                    目前仅开放白菜音色,后续将扩展为本地训练推理工具,用户可训练自己的音色进行社区共享。<br>
                    男转女推荐+12key,女转男推荐-12key,如果音域爆炸导致音色失真也可以自己调整到合适音域
                    """)
                with gr.Row():
                    with gr.Column():
                        sid0 = gr.Dropdown(label="音色", choices=names)
                        vc_transform0 = gr.Number(label="变调(整数,半音数量,升八度12降八度-12)", value=12)
                        f0_file = gr.File(label="F0曲线文件,可选,一行一个音高,代替默认F0及升降调")
                    input_audio0 = gr.Audio(label="上传音频")
                    but0=gr.Button("转换", variant="primary")
                    with gr.Column():
                        vc_output1 = gr.Textbox(label="输出信息")
                        vc_output2 = gr.Audio(label="输出音频")
                    but0.click(vc_single, [sid0, input_audio0, vc_transform0,f0_file], [vc_output1, vc_output2])
            with gr.Group():
                gr.Markdown(value="""
                    批量转换,上传多个音频文件,在指定文件夹(默认opt)下输出转换的音频。<br>
                    合格的文件夹路径格式举例:E:\codes\py39\\vits_vc_gpu\白鹭霜华测试样例(去文件管理器地址栏拷就行了)
                    """)
                with gr.Row():
                    with gr.Column():
                        sid1 = gr.Dropdown(label="音色", choices=names)
                        vc_transform1 = gr.Number(label="变调(整数,半音数量,升八度12降八度-12)", value=12)
                        opt_input = gr.Textbox(label="指定输出文件夹",value="opt")
                    with gr.Column():
                        dir_input = gr.Textbox(label="输入待处理音频文件夹路径")
                        inputs = gr.File(file_count="multiple", label="也可批量输入音频文件,二选一,优先读文件夹")
                    but1=gr.Button("转换", variant="primary")
                    vc_output3 = gr.Textbox(label="输出信息")
                    but1.click(vc_multi, [sid1, dir_input,opt_input,inputs, vc_transform1], [vc_output3])

        with gr.TabItem("数据处理"):
            with gr.Group():
                gr.Markdown(value="""
                    人声伴奏分离批量处理,使用UVR5模型。<br>
                    不带和声用HP2,带和声且提取的人声不需要和声用HP5<br>
                    合格的文件夹路径格式举例:E:\codes\py39\\vits_vc_gpu\白鹭霜华测试样例(去文件管理器地址栏拷就行了)
                    """)
                with gr.Row():
                    with gr.Column():
                        dir_wav_input = gr.Textbox(label="输入待处理音频文件夹路径")
                        wav_inputs = gr.File(file_count="multiple", label="也可批量输入音频文件,二选一,优先读文件夹")
                    with gr.Column():
                        model_choose = gr.Dropdown(label="模型", choices=uvr5_names)
                        opt_vocal_root = gr.Textbox(label="指定输出人声文件夹",value="opt")
                        opt_ins_root = gr.Textbox(label="指定输出乐器文件夹",value="opt")
                    but2=gr.Button("转换", variant="primary")
                    vc_output4 = gr.Textbox(label="输出信息")
                    but2.click(uvr, [model_choose, dir_wav_input,opt_vocal_root,opt_ins_root], [vc_output4])
        with gr.TabItem("训练-待开放"):pass

    # app.launch(server_name="0.0.0.0",server_port=7860)
    app.launch(server_name="127.0.0.1",server_port=7860)