rvc / lib /infer.py
Thatguy099's picture
Update lib/infer.py
dc81f37 verified
import os
import shutil
import gc
import torch
from multiprocessing import cpu_count
from lib.modules import VC
from lib.split_audio import split_silence_nonsilent, adjust_audio_lengths, combine_silence_nonsilent
import logging
from datetime import datetime
import traceback
# Configure logging
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(process)d - %(funcName)s:%(lineno)d - %(message)s',
handlers=[
logging.FileHandler(f'debug_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'),
logging.StreamHandler()
]
)
class Configs:
def __init__(self, device, is_half):
logging.debug(f"Initializing Configs with device={device}, is_half={is_half}")
self.device = device
self.is_half = is_half
self.n_cpu = 0
self.gpu_name = None
self.gpu_mem = None
try:
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
logging.debug(f"Device configuration: pad={self.x_pad}, query={self.x_query}, "
f"center={self.x_center}, max={self.x_max}")
except Exception as e:
logging.error(f"Failed to configure device: {str(e)}")
raise
def device_config(self) -> tuple:
if torch.cuda.is_available():
i_device = int(self.device.split(":")[-1])
self.gpu_name = torch.cuda.get_device_name(i_device)
logging.debug(f"GPU detected: {self.gpu_name}")
elif torch.backends.mps.is_available():
logging.warning("No supported N-card found, falling back to MPS")
self.device = "mps"
else:
logging.warning("No supported N-card found, falling back to CPU")
self.device = "cpu"
if self.n_cpu == 0:
self.n_cpu = cpu_count()
logging.debug(f"Detected {self.n_cpu} CPU cores")
# Memory configuration settings
if self.is_half:
x_pad = 3
x_query = 10
x_center = 60
x_max = 65
else:
x_pad = 1
x_query = 6
x_center = 38
x_max = 41
if self.gpu_mem is not None and self.gpu_mem <= 4:
x_pad = 1
x_query = 5
x_center = 30
x_max = 32
return x_pad, x_query, x_center, x_max
def get_model(voice_model):
model_dir = os.path.join(os.getcwd(), "models", voice_model)
logging.debug(f"Searching for model files in directory: {model_dir}")
model_filename, index_filename = None, None
try:
for file in os.listdir(model_dir):
ext = os.path.splitext(file)[1]
if ext == '.pth':
model_filename = file
logging.debug(f"Found model file: {file}")
elif ext == '.index':
index_filename = file
logging.debug(f"Found index file: {file}")
if model_filename is None:
logging.error(f"No model file exists in {model_dir}")
raise FileNotFoundError(f"No model file exists in {model_dir}")
return os.path.join(model_dir, model_filename), os.path.join(model_dir, index_filename) if index_filename else ''
except Exception as e:
logging.error(f"Failed to retrieve model files: {str(e)}")
raise
def infer_audio(
model_name,
audio_path,
f0_change=0,
f0_method="rmvpe+",
min_pitch="50",
max_pitch="1100",
crepe_hop_length=128,
index_rate=0.75,
filter_radius=3,
rms_mix_rate=0.25,
protect=0.33,
split_infer=False,
min_silence=500,
silence_threshold=-50,
seek_step=1,
keep_silence=100,
do_formant=False,
quefrency=0,
timbre=1,
f0_autotune=False,
audio_format="wav",
resample_sr=0,
hubert_model_path="assets/hubert/hubert_base.pt",
rmvpe_model_path="assets/rmvpe/rmvpe.pt",
fcpe_model_path="assets/fcpe/fcpe.pt"
):
logging.info(f"Starting inference with parameters:")
logging.info(f"- Model: {model_name}")
logging.info(f"- Audio path: {audio_path}")
logging.info(f"- F0 change: {f0_change}, Method: {f0_method}")
logging.info(f"- Split inference: {split_infer}")
os.environ["rmvpe_model_path"] = rmvpe_model_path
os.environ["fcpe_model_path"] = fcpe_model_path
try:
configs = Configs('cuda:0', True)
vc = VC(configs)
pth_path, index_path = get_model(model_name)
vc_data = vc.get_vc(pth_path, protect, 0.5)
if split_infer:
logging.info("Split inference mode enabled")
inferred_files = []
temp_dir = os.path.join(os.getcwd(), "seperate", "temp")
os.makedirs(temp_dir, exist_ok=True)
try:
silence_files, nonsilent_files = split_silence_nonsilent(
audio_path, min_silence, silence_threshold, seek_step, keep_silence
)
logging.debug(f"Silence segments: {len(silence_files)}")
logging.debug(f"Nonsilent segments: {len(nonsilent_files)}")
for i, nonsilent_file in enumerate(nonsilent_files):
logging.info(f"Processing segment {i+1}/{len(nonsilent_files)}")
start_time = datetime.now()
inference_info, audio_data, output_path = vc.vc_single(
0,
nonsilent_file,
f0_change,
f0_method,
index_path,
index_path,
index_rate,
filter_radius,
resample_sr,
rms_mix_rate,
protect,
audio_format,
crepe_hop_length,
do_formant,
quefrency,
timbre,
min_pitch,
max_pitch,
f0_autotune,
hubert_model_path
)
process_time = (datetime.now() - start_time).total_seconds()
logging.debug(f"Segment processing time: {process_time:.2f}s")
if inference_info[0] == "Success.":
logging.info("Segment processed successfully")
logging.debug(inference_info[1])
logging.debug(f"Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs" % (*inference_info[2],))
inferred_files.append(output_path)
else:
logging.error(f"Error processing segment {i+1}: {inference_info[0]}")
raise RuntimeError(f"Error processing segment {i+1}")
logging.info("Adjusting inferred audio lengths")
adjusted_inferred_files = adjust_audio_lengths(nonsilent_files, inferred_files)
logging.info("Combining silence and inferred audios")
output_count = 1
while True:
output_path = os.path.join(
os.getcwd(),
"output",
f"{os.path.splitext(os.path.basename(audio_path))[0]}{model_name}"
f"{f0_method.capitalize()}_{output_count}.{audio_format}"
)
if not os.path.exists(output_path):
break
output_count += 1
output_path = combine_silence_nonsilent(silence_files, adjusted_inferred_files, keep_silence, output_path)
# Cleanup temporary files
for inferred_file in inferred_files:
shutil.move(inferred_file, temp_dir)
shutil.rmtree(temp_dir)
except Exception as e:
logging.error(f"Split inference failed: {str(e)}")
raise
else:
logging.info("Single inference mode")
start_time = datetime.now()
inference_info, audio_data, output_path = vc.vc_single(
0,
audio_path,
f0_change,
f0_method,
index_path,
index_path,
index_rate,
filter_radius,
resample_sr,
rms_mix_rate,
protect,
audio_format,
crepe_hop_length,
do_formant,
quefrency,
timbre,
min_pitch,
max_pitch,
f0_autotune,
hubert_model_path
)
process_time = (datetime.now() - start_time).total_seconds()
logging.debug(f"Total processing time: {process_time:.2f}s")
if inference_info[0] == "Success.":
logging.info("Inference completed successfully")
logging.debug(inference_info[1])
logging.debug(f"Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs" % (*inference_info[2],))
else:
logging.error(f"Inference failed: {inference_info[0]}")
raise RuntimeError(inference_info[0])
del configs, vc
gc.collect()
return output_path
except Exception as e:
logging.error(f"Inference failed: {str(e)}")
logging.error(traceback.format_exc())
raise