Thatneos
/

rvc

Model card Files Files and versions Community

rvc

File size: 9,694 Bytes

dc81f37

import os
import shutil
import gc
import torch
from multiprocessing import cpu_count
from lib.modules import VC
from lib.split_audio import split_silence_nonsilent, adjust_audio_lengths, combine_silence_nonsilent
import logging
from datetime import datetime
import traceback

# Configure logging
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(process)d - %(funcName)s:%(lineno)d - %(message)s',
    handlers=[
        logging.FileHandler(f'debug_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'),
        logging.StreamHandler()
    ]
)

class Configs:
    def __init__(self, device, is_half):
        logging.debug(f"Initializing Configs with device={device}, is_half={is_half}")
        self.device = device
        self.is_half = is_half
        self.n_cpu = 0
        self.gpu_name = None
        self.gpu_mem = None
        try:
            self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
            logging.debug(f"Device configuration: pad={self.x_pad}, query={self.x_query}, "
                         f"center={self.x_center}, max={self.x_max}")
        except Exception as e:
            logging.error(f"Failed to configure device: {str(e)}")
            raise

    def device_config(self) -> tuple:
        if torch.cuda.is_available():
            i_device = int(self.device.split(":")[-1])
            self.gpu_name = torch.cuda.get_device_name(i_device)
            logging.debug(f"GPU detected: {self.gpu_name}")
        elif torch.backends.mps.is_available():
            logging.warning("No supported N-card found, falling back to MPS")
            self.device = "mps"
        else:
            logging.warning("No supported N-card found, falling back to CPU")
            self.device = "cpu"

        if self.n_cpu == 0:
            self.n_cpu = cpu_count()
            logging.debug(f"Detected {self.n_cpu} CPU cores")

        # Memory configuration settings
        if self.is_half:
            x_pad = 3
            x_query = 10
            x_center = 60
            x_max = 65
        else:
            x_pad = 1
            x_query = 6
            x_center = 38
            x_max = 41

        if self.gpu_mem is not None and self.gpu_mem <= 4:
            x_pad = 1
            x_query = 5
            x_center = 30
            x_max = 32

        return x_pad, x_query, x_center, x_max

def get_model(voice_model):
    model_dir = os.path.join(os.getcwd(), "models", voice_model)
    logging.debug(f"Searching for model files in directory: {model_dir}")
    
    model_filename, index_filename = None, None
    try:
        for file in os.listdir(model_dir):
            ext = os.path.splitext(file)[1]
            if ext == '.pth':
                model_filename = file
                logging.debug(f"Found model file: {file}")
            elif ext == '.index':
                index_filename = file
                logging.debug(f"Found index file: {file}")

        if model_filename is None:
            logging.error(f"No model file exists in {model_dir}")
            raise FileNotFoundError(f"No model file exists in {model_dir}")

        return os.path.join(model_dir, model_filename), os.path.join(model_dir, index_filename) if index_filename else ''
        
    except Exception as e:
        logging.error(f"Failed to retrieve model files: {str(e)}")
        raise

def infer_audio(
    model_name,
    audio_path,
    f0_change=0,
    f0_method="rmvpe+",
    min_pitch="50",
    max_pitch="1100",
    crepe_hop_length=128,
    index_rate=0.75,
    filter_radius=3,
    rms_mix_rate=0.25,
    protect=0.33,
    split_infer=False,
    min_silence=500,
    silence_threshold=-50,
    seek_step=1,
    keep_silence=100,
    do_formant=False,
    quefrency=0,
    timbre=1,
    f0_autotune=False,
    audio_format="wav",
    resample_sr=0,
    hubert_model_path="assets/hubert/hubert_base.pt",
    rmvpe_model_path="assets/rmvpe/rmvpe.pt",
    fcpe_model_path="assets/fcpe/fcpe.pt"
):
    logging.info(f"Starting inference with parameters:")
    logging.info(f"- Model: {model_name}")
    logging.info(f"- Audio path: {audio_path}")
    logging.info(f"- F0 change: {f0_change}, Method: {f0_method}")
    logging.info(f"- Split inference: {split_infer}")

    os.environ["rmvpe_model_path"] = rmvpe_model_path
    os.environ["fcpe_model_path"] = fcpe_model_path
    
    try:
        configs = Configs('cuda:0', True)
        vc = VC(configs)
        pth_path, index_path = get_model(model_name)
        vc_data = vc.get_vc(pth_path, protect, 0.5)
        
        if split_infer:
            logging.info("Split inference mode enabled")
            inferred_files = []
            temp_dir = os.path.join(os.getcwd(), "seperate", "temp")
            os.makedirs(temp_dir, exist_ok=True)
            
            try:
                silence_files, nonsilent_files = split_silence_nonsilent(
                    audio_path, min_silence, silence_threshold, seek_step, keep_silence
                )
                logging.debug(f"Silence segments: {len(silence_files)}")
                logging.debug(f"Nonsilent segments: {len(nonsilent_files)}")

                for i, nonsilent_file in enumerate(nonsilent_files):
                    logging.info(f"Processing segment {i+1}/{len(nonsilent_files)}")
                    
                    start_time = datetime.now()
                    inference_info, audio_data, output_path = vc.vc_single(
                        0,
                        nonsilent_file,
                        f0_change,
                        f0_method,
                        index_path,
                        index_path,
                        index_rate,
                        filter_radius,
                        resample_sr,
                        rms_mix_rate,
                        protect,
                        audio_format,
                        crepe_hop_length,
                        do_formant,
                        quefrency,
                        timbre,
                        min_pitch,
                        max_pitch,
                        f0_autotune,
                        hubert_model_path
                    )
                    process_time = (datetime.now() - start_time).total_seconds()
                    logging.debug(f"Segment processing time: {process_time:.2f}s")

                    if inference_info[0] == "Success.":
                        logging.info("Segment processed successfully")
                        logging.debug(inference_info[1])
                        logging.debug(f"Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs" % (*inference_info[2],))
                        inferred_files.append(output_path)
                    else:
                        logging.error(f"Error processing segment {i+1}: {inference_info[0]}")
                        raise RuntimeError(f"Error processing segment {i+1}")

                logging.info("Adjusting inferred audio lengths")
                adjusted_inferred_files = adjust_audio_lengths(nonsilent_files, inferred_files)

                logging.info("Combining silence and inferred audios")
                output_count = 1
                while True:
                    output_path = os.path.join(
                        os.getcwd(),
                        "output",
                        f"{os.path.splitext(os.path.basename(audio_path))[0]}{model_name}"
                        f"{f0_method.capitalize()}_{output_count}.{audio_format}"
                    )
                    if not os.path.exists(output_path):
                        break
                    output_count += 1
                
                output_path = combine_silence_nonsilent(silence_files, adjusted_inferred_files, keep_silence, output_path)
                
                # Cleanup temporary files
                for inferred_file in inferred_files:
                    shutil.move(inferred_file, temp_dir)
                shutil.rmtree(temp_dir)
                
            except Exception as e:
                logging.error(f"Split inference failed: {str(e)}")
                raise

        else:
            logging.info("Single inference mode")
            start_time = datetime.now()
            inference_info, audio_data, output_path = vc.vc_single(
                0,
                audio_path,
                f0_change,
                f0_method,
                index_path,
                index_path,
                index_rate,
                filter_radius,
                resample_sr,
                rms_mix_rate,
                protect,
                audio_format,
                crepe_hop_length,
                do_formant,
                quefrency,
                timbre,
                min_pitch,
                max_pitch,
                f0_autotune,
                hubert_model_path
            )
            process_time = (datetime.now() - start_time).total_seconds()
            logging.debug(f"Total processing time: {process_time:.2f}s")

            if inference_info[0] == "Success.":
                logging.info("Inference completed successfully")
                logging.debug(inference_info[1])
                logging.debug(f"Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs" % (*inference_info[2],))
            else:
                logging.error(f"Inference failed: {inference_info[0]}")
                raise RuntimeError(inference_info[0])

        del configs, vc
        gc.collect()
        return output_path
        
    except Exception as e:
        logging.error(f"Inference failed: {str(e)}")
        logging.error(traceback.format_exc())
        raise