File size: 8,410 Bytes
a5e008e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
from inference.infer_tool import Svc
from vextract.vocal_extract import VEX
import gradio as gr
import os
# os.environ['CUDA_VISIBLE_DEVICES'] = '1,2'
class VitsGradio:
def __init__(self):
self.so = Svc()
self.v = VEX()
self.lspk = []
self.modelPaths = []
for root, dirs, files in os.walk("checkpoints"):
for dir in dirs:
self.modelPaths.append(dir)
with gr.Blocks(title="Sovits Singing Synthesis Tool") as self.Vits:
gr.Markdown(
"""
# Singing Synthesis Tool
- Please select the voice model, device, and operating mode in sequence, then click "Load Model"
- The input audio needs to be clean vocals
"""
)
with gr.Tab("Vocal Extraction"):
with gr.Row():
with gr.Column():
sample_audio = gr.Audio(label="Input Audio")
extractAudioBtn = gr.Button("Extract Vocals")
with gr.Row():
with gr.Column():
self.sample_vocal_output = gr.Audio(label="Output Audio")
self.sample_accompaniment_output = gr.Audio()
extractAudioBtn.click(self.v.separate, inputs=[sample_audio],
outputs=[self.sample_vocal_output, self.sample_accompaniment_output],
show_progress=True, api_name="extract")
with gr.Tab("Singing Synthesis"):
with gr.Row(visible=False) as self.VoiceConversion:
with gr.Column():
with gr.Row():
with gr.Column():
self.srcaudio = gr.Audio(label="Input Audio")
self.btnVC = gr.Button("Speaker Conversion")
with gr.Column():
with gr.Row():
with gr.Column():
self.dsid0 = gr.Dropdown(label="Target Character", choices=self.lspk)
self.tran = gr.Slider(label="Pitch Shift", maximum=60, minimum=-60, step=1, value=0)
self.th = gr.Slider(label="Slice Threshold", maximum=32767, minimum=-32768, step=0.1,
value=-40)
self.ns = gr.Slider(label="Noise Level", maximum=1.0, minimum=0.0, step=0.1,
value=0.4)
with gr.Row():
self.VCOutputs = gr.Audio()
self.btnVC.click(self.so.inference, inputs=[self.srcaudio, self.dsid0, self.tran, self.th, self.ns],
outputs=[self.VCOutputs], show_progress=True, api_name="run")
with gr.Row(visible=False) as self.VoiceBatchConversion:
with gr.Column():
with gr.Row():
with gr.Column():
self.srcaudio = gr.Files(label="Upload Multiple Audio Files", file_types=['.wav'],
interactive=True)
self.btnVC = gr.Button("Speaker Conversion")
with gr.Column():
with gr.Row():
with gr.Column():
self.dsid1 = gr.Dropdown(label="Target Character", choices=self.lspk)
self.tran = gr.Slider(label="Pitch Shift", maximum=60, minimum=-60, step=1, value=0)
self.th = gr.Slider(label="Slice Threshold", maximum=32767, minimum=-32768, step=0.1,
value=-40)
self.ns = gr.Slider(label="Noise Level", maximum=1.0, minimum=0.0, step=0.1,
value=0.4)
with gr.Row():
self.VCOutputs = gr.File(label="Output Zip File", interactive=False)
self.btnVC.click(self.batch_inference, inputs=[self.srcaudio, self.dsid1, self.tran, self.th, self.ns],
outputs=[self.VCOutputs], show_progress=True, api_name="batch")
with gr.Row():
with gr.Column():
modelstrs = gr.Dropdown(label="Model", choices=self.modelPaths, value=self.modelPaths[0],
type="value")
devicestrs = gr.Dropdown(label="Device", choices=["cpu", "cuda"], value="cuda", type="value")
isbatchmod = gr.Radio(label="Operating Mode", choices=["single", "batch"], value="single",
info="single: Single file processing. batch: Batch processing supports uploading multiple files")
btnMod = gr.Button("Load Model")
btnMod.click(self.loadModel, inputs=[modelstrs, devicestrs, isbatchmod],
outputs=[self.dsid0, self.dsid1, self.VoiceConversion, self.VoiceBatchConversion],
show_progress=True, api_name="switch")
def batch_inference(self, files, chara, tran, slice_db, ns, progress=gr.Progress()):
from zipfile import ZipFile
from scipy.io import wavfile
import uuid
temp_directory = "temp"
if not os.path.exists(temp_directory):
os.mkdir(temp_directory)
progress(0.00, desc="Initializing Directory")
tmp_workdir_name = f"{temp_directory}/batch_{uuid.uuid4()}"
if not os.path.exists(tmp_workdir_name):
os.mkdir(tmp_workdir_name)
progress(0.10, desc="Initializing Directory")
output_files = []
for idx, file in enumerate(files):
filename = os.path.basename(file.name)
progress(0.10 + (0.70 / float(len(files))) * (idx + 1.00), desc=f"Processing Audio {(idx + 1)}/{len(files)}: {filename}")
print(f"{idx}, {file}, {filename}")
sampling_rate, audio = wavfile.read(file.name)
output_sampling_rate, output_audio = self.so.inference((sampling_rate, audio), chara=chara, tran=tran,
slice_db=slice_db, ns=ns)
new_filepath = f"{tmp_workdir_name}/{filename}"
wavfile.write(filename=new_filepath, rate=output_sampling_rate, data=output_audio)
output_files.append(new_filepath)
progress(0.70, desc="Audio Processing Complete")
zipfilename = f"{tmp_workdir_name}/output.zip"
with ZipFile(zipfilename, "w") as zip_obj:
for idx, filepath in enumerate(output_files):
zip_obj.write(filepath, os.path.basename(filepath))
progress(0.80, desc="Compression Complete")
# todo: remove data
progress(1.00, desc="Cleaning Up")
return zipfilename
def loadModel(self, path, device, process_mode):
self.lspk = []
print(f"path: {path}, device: {device}")
self.so.set_device(device)
print(f"device set.")
self.so.load_checkpoint(path)
print(f"checkpoint loaded")
for spk, sid in self.so.hps_ms.spk.items():
self.lspk.append(spk)
print(f"LSPK: {self.lspk}")
if process_mode == "single":
VChange = gr.update(visible=True)
VBChange = gr.update(visible=False)
else:
VChange = gr.update(visible=False)
VBChange = gr.update(visible=True)
SD0Change = gr.update(choices=self.lspk, value=self.lspk[0])
SD1Change = gr.update(choices=self.lspk, value=self.lspk[0])
print("All set. Updating display")
return [SD0Change, SD1Change, VChange, VBChange]
if __name__ == "__main__":
grVits = VitsGradio()
grVits.Vits\
.queue(concurrency_count=20, status_update_rate=5.0)\
.launch(server_port=7870, share=True, show_api=False)
|