File size: 8,303 Bytes
2b7fd6e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
from inference.infer_tool import Svc
from vextract.vocal_extract import VEX
import gradio as gr
import os
# os.environ['CUDA_VISIBLE_DEVICES'] = '1,2'
class VitsGradio:
def __init__(self):
self.so = Svc()
self.v = VEX()
self.lspk = []
self.modelPaths = []
for root, dirs, files in os.walk("checkpoints"):
for dir in dirs:
self.modelPaths.append(dir)
with gr.Blocks(title="Sovits歌声合成工具") as self.Vits:
gr.Markdown(
"""
# 歌声合成工具
- 请依次选择语音模型、设备以及运行模式,然后点击"载入模型"
- 输入音频需要是干净的人声
"""
)
with gr.Tab("人声提取"):
with gr.Row():
with gr.Column():
sample_audio = gr.Audio(label="输入音频")
extractAudioBtn = gr.Button("提取人声")
with gr.Row():
with gr.Column():
self.sample_vocal_output = gr.Audio(label="输出音频")
self.sample_accompaniment_output = gr.Audio()
extractAudioBtn.click(self.v.separate, inputs=[sample_audio],
outputs=[self.sample_vocal_output, self.sample_accompaniment_output],
show_progress=True, api_name="extract")
with gr.Tab("歌声合成"):
with gr.Row(visible=False) as self.VoiceConversion:
with gr.Column():
with gr.Row():
with gr.Column():
self.srcaudio = gr.Audio(label="输入音频")
self.btnVC = gr.Button("说话人转换")
with gr.Column():
with gr.Row():
with gr.Column():
self.dsid0 = gr.Dropdown(label="目标角色", choices=self.lspk)
self.tran = gr.Slider(label="升降调", maximum=60, minimum=-60, step=1, value=0)
self.th = gr.Slider(label="切片阈值", maximum=32767, minimum=-32768, step=0.1,
value=-40)
self.ns = gr.Slider(label="噪音级别", maximum=1.0, minimum=0.0, step=0.1,
value=0.4)
with gr.Row():
self.VCOutputs = gr.Audio()
self.btnVC.click(self.so.inference, inputs=[self.srcaudio, self.dsid0, self.tran, self.th, self.ns],
outputs=[self.VCOutputs], show_progress=True, api_name="run")
with gr.Row(visible=False) as self.VoiceBatchConversion:
with gr.Column():
with gr.Row():
with gr.Column():
self.srcaudio = gr.Files(label="上传多个音频文件", file_types=['.wav'],
interactive=True)
self.btnVC = gr.Button("说话人转换")
with gr.Column():
with gr.Row():
with gr.Column():
self.dsid1 = gr.Dropdown(label="目标角色", choices=self.lspk)
self.tran = gr.Slider(label="升降调", maximum=60, minimum=-60, step=1, value=0)
self.th = gr.Slider(label="切片阈值", maximum=32767, minimum=-32768, step=0.1,
value=-40)
self.ns = gr.Slider(label="噪音级别", maximum=1.0, minimum=0.0, step=0.1,
value=0.4)
with gr.Row():
self.VCOutputs = gr.File(label="Output Zip File", interactive=False)
self.btnVC.click(self.batch_inference, inputs=[self.srcaudio, self.dsid1, self.tran, self.th, self.ns],
outputs=[self.VCOutputs], show_progress=True, api_name="batch")
with gr.Row():
with gr.Column():
modelstrs = gr.Dropdown(label="模型", choices=self.modelPaths, value=self.modelPaths[0],
type="value")
devicestrs = gr.Dropdown(label="设备", choices=["cpu", "cuda"], value="cuda", type="value")
isbatchmod = gr.Radio(label="运行模式", choices=["single", "batch"], value="single",
info="single: 单个文件处理. batch:批量处理支持上传多个文件")
btnMod = gr.Button("载入模型")
btnMod.click(self.loadModel, inputs=[modelstrs, devicestrs, isbatchmod],
outputs=[self.dsid0, self.dsid1, self.VoiceConversion, self.VoiceBatchConversion],
show_progress=True, api_name="switch")
def batch_inference(self, files, chara, tran, slice_db, ns, progress=gr.Progress()):
from zipfile import ZipFile
from scipy.io import wavfile
import uuid
temp_directory = "temp"
if not os.path.exists(temp_directory):
os.mkdir(temp_directory)
progress(0.00, desc="初始化文件夹")
tmp_workdir_name = f"{temp_directory}/batch_{uuid.uuid4()}"
if not os.path.exists(tmp_workdir_name):
os.mkdir(tmp_workdir_name)
progress(0.10, desc="初始化文件夹")
output_files = []
for idx, file in enumerate(files):
filename = os.path.basename(file.name)
progress(0.10 + (0.70 / float(len(files))) * (idx + 1.00), desc=f"处理音频{(idx + 1)}/{len(files)}:{filename}")
print(f"{idx}, {file}, {filename}")
sampling_rate, audio = wavfile.read(file.name)
output_sampling_rate, output_audio = self.so.inference((sampling_rate, audio), chara=chara, tran=tran,
slice_db=slice_db, ns=ns)
new_filepath = f"{tmp_workdir_name}/{filename}"
wavfile.write(filename=new_filepath, rate=output_sampling_rate, data=output_audio)
output_files.append(new_filepath)
progress(0.70, desc="音频处理完毕")
zipfilename = f"{tmp_workdir_name}/output.zip"
with ZipFile(zipfilename, "w") as zip_obj:
for idx, filepath in enumerate(output_files):
zip_obj.write(filepath, os.path.basename(filepath))
progress(0.80, desc="压缩完毕")
# todo: remove data
progress(1.00, desc="清理空间")
return zipfilename
def loadModel(self, path, device, process_mode):
self.lspk = []
print(f"path: {path}, device: {device}")
self.so.set_device(device)
print(f"device set.")
self.so.load_checkpoint(path)
print(f"checkpoint loaded")
for spk, sid in self.so.hps_ms.spk.items():
self.lspk.append(spk)
print(f"LSPK: {self.lspk}")
if process_mode == "single":
VChange = gr.update(visible=True)
VBChange = gr.update(visible=False)
else:
VChange = gr.update(visible=False)
VBChange = gr.update(visible=True)
SD0Change = gr.update(choices=self.lspk, value=self.lspk[0])
SD1Change = gr.update(choices=self.lspk, value=self.lspk[0])
print("allset update display")
return [SD0Change, SD1Change, VChange, VBChange]
if __name__ == "__main__":
grVits = VitsGradio()
grVits.Vits\
.queue(concurrency_count=20, status_update_rate=5.0)\
.launch(server_port=7870, share=True, show_api=True)
|