diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -1,3154 +1,65 @@ -import os, sys -os.system("pip install pyworld") # ==0.3.3 +import gradio as gr +import sys +import os +import logging now_dir = os.getcwd() sys.path.append(now_dir) -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' -os.environ["OPENBLAS_NUM_THREADS"] = "1" -os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1" - -# Download models -shell_script = './tools/dlmodels.sh' -os.system(f'chmod +x {shell_script}') -os.system('apt install git-lfs') -os.system('git lfs install') -os.system('apt-get -y install aria2') -os.system('aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d . -o hubert_base.pt') -try: - return_code = os.system(shell_script) - if return_code == 0: - print("Shell script executed successfully.") - else: - print(f"Shell script failed with return code {return_code}") -except Exception as e: - print(f"An error occurred: {e}") - - -import logging -import shutil -import threading -import lib.globals.globals as rvc_globals -from LazyImport import lazyload -import mdx -from mdx_processing_script import get_model_list,id_to_ptm,prepare_mdx,run_mdx -math = lazyload('math') -import traceback -import warnings -tensorlowest = lazyload('tensorlowest') -from random import shuffle -from subprocess import Popen -from time import sleep -import json -import pathlib - -import fairseq -logging.getLogger("faiss").setLevel(logging.WARNING) -import faiss -gr = lazyload("gradio") -np = lazyload("numpy") -torch = lazyload('torch') -re = lazyload('regex') -SF = lazyload("soundfile") -SFWrite = SF.write -from dotenv import load_dotenv -from sklearn.cluster import MiniBatchKMeans -import datetime - - -from glob import glob1 -import signal -from signal import SIGTERM -import librosa - -from configs.config import Config -from i18n import I18nAuto -from infer.lib.train.process_ckpt import ( - change_info, - extract_small_model, - merge, - show_info, -) -#from infer.modules.uvr5.modules import uvr -from infer.modules.vc.modules import VC -from infer.modules.vc.utils import * -from infer.modules.vc.pipeline import Pipeline -import lib.globals.globals as rvc_globals -math = lazyload('math') -ffmpeg = lazyload('ffmpeg') -import nltk -nltk.download('punkt', quiet=True) -from nltk.tokenize import sent_tokenize -from bark import SAMPLE_RATE - -import easy_infer -import audioEffects -from infer.lib.csvutil import CSVutil - -from lib.infer_pack.models import ( - SynthesizerTrnMs256NSFsid, - SynthesizerTrnMs256NSFsid_nono, - SynthesizerTrnMs768NSFsid, - SynthesizerTrnMs768NSFsid_nono, -) -from lib.infer_pack.models_onnx import SynthesizerTrnMsNSFsidM -from infer_uvr5 import _audio_pre_, _audio_pre_new -from MDXNet import MDXNetDereverb -from infer.lib.audio import load_audio - - -from sklearn.cluster import MiniBatchKMeans - -import time -import csv - -from shlex import quote as SQuote - - - -RQuote = lambda val: SQuote(str(val)) +# Tabs +from tabs.inference.inference import inference_tab +from tabs.download.download import download_tab +from tabs.tts.tts import tts_tab -tmp = os.path.join(now_dir, "TEMP") -runtime_dir = os.path.join(now_dir, "runtime/Lib/site-packages") -directories = ['logs', 'audios', 'datasets', 'weights', 'audio-others' , 'audio-outputs'] +# Assets +import assets.themes.loadThemes as loadThemes +from assets.i18n.i18n import I18nAuto +import assets.installation_checker as installation_checker +from assets.discord_presence import RPCManager +from assets.flask.server import start_flask, load_config_flask +from core import run_prerequisites_script +from delete_models import start_infinite_loop -shutil.rmtree(tmp, ignore_errors=True) -shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True) -shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True) - -os.makedirs(tmp, exist_ok=True) -for folder in directories: - os.makedirs(os.path.join(now_dir, folder), exist_ok=True) - - -os.makedirs(tmp, exist_ok=True) -os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True) -os.makedirs(os.path.join(now_dir, "assets/weights"), exist_ok=True) -os.environ["TEMP"] = tmp -warnings.filterwarnings("ignore") -torch.manual_seed(114514) -logging.getLogger("numba").setLevel(logging.WARNING) - -logger = logging.getLogger(__name__) - - -if not os.path.isdir("csvdb/"): - os.makedirs("csvdb") - frmnt, stp = open("csvdb/formanting.csv", "w"), open("csvdb/stop.csv", "w") - frmnt.close() - stp.close() - -global DoFormant, Quefrency, Timbre - -try: - DoFormant, Quefrency, Timbre = CSVutil("csvdb/formanting.csv", "r", "formanting") - DoFormant = ( - lambda DoFormant: True - if DoFormant.lower() == "true" - else (False if DoFormant.lower() == "false" else DoFormant) - )(DoFormant) -except (ValueError, TypeError, IndexError): - DoFormant, Quefrency, Timbre = False, 1.0, 1.0 - CSVutil("csvdb/formanting.csv", "w+", "formanting", DoFormant, Quefrency, Timbre) - -load_dotenv() -config = Config() -vc = VC(config) - -if config.dml == True: - - def forward_dml(ctx, x, scale): - ctx.scale = scale - res = x.clone().detach() - return res - - fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml +run_prerequisites_script("False", "True", "True", "True") +start_infinite_loop() i18n = I18nAuto() -i18n.print() -# 判断是否有能用来训练和加速推理的N卡 -ngpu = torch.cuda.device_count() -gpu_infos = [] -mem = [] -if_gpu_ok = False - -isinterrupted = 0 - +installation_checker.check_installation() +logging.getLogger("uvicorn").disabled = True +logging.getLogger("fairseq").disabled = True -if torch.cuda.is_available() or ngpu != 0: - for i in range(ngpu): - gpu_name = torch.cuda.get_device_name(i) - if any( - value in gpu_name.upper() - for value in [ - "10", - "16", - "20", - "30", - "40", - "A2", - "A3", - "A4", - "P4", - "A50", - "500", - "A60", - "70", - "80", - "90", - "M4", - "T4", - "TITAN", - ] - ): - # A10#A100#V100#A40#P40#M40#K80#A4500 - if_gpu_ok = True # 至少有一张能用的N卡 - gpu_infos.append("%s\t%s" % (i, gpu_name)) - mem.append( - int( - torch.cuda.get_device_properties(i).total_memory - / 1024 - / 1024 - / 1024 - + 0.4 - ) - ) -if if_gpu_ok and len(gpu_infos) > 0: - gpu_info = "\n".join(gpu_infos) - default_batch_size = min(mem) // 2 +my_applio = loadThemes.load_json() +if my_applio: + pass else: - gpu_info = "Unfortunately, there is no compatible GPU available to support your training." - default_batch_size = 1 -gpus = "-".join([i[0] for i in gpu_infos]) - -class ToolButton(gr.Button, gr.components.FormComponent): - """Small button with single emoji as text, fits inside gradio forms""" - - def __init__(self, **kwargs): - super().__init__(variant="tool", **kwargs) - - def get_block_name(self): - return "button" - - -hubert_model = None -weight_root = os.getenv("weight_root") -weight_uvr5_root = os.getenv("weight_uvr5_root") -index_root = os.getenv("index_root") -datasets_root = "datasets" -fshift_root = "formantshiftcfg" -audio_root = "audios" -audio_others_root = "audio-others" - -sup_audioext = {'wav', 'mp3', 'flac', 'ogg', 'opus', - 'm4a', 'mp4', 'aac', 'alac', 'wma', - 'aiff', 'webm', 'ac3'} - -names = [os.path.join(root, file) - for root, _, files in os.walk(weight_root) - for file in files - if file.endswith((".pth", ".onnx"))] - -indexes_list = [os.path.join(root, name) - for root, _, files in os.walk(index_root, topdown=False) - for name in files - if name.endswith(".index") and "trained" not in name] - -audio_paths = [os.path.join(root, name) - for root, _, files in os.walk(audio_root, topdown=False) - for name in files - if name.endswith(tuple(sup_audioext))] - -audio_others_paths = [os.path.join(root, name) - for root, _, files in os.walk(audio_others_root, topdown=False) - for name in files - if name.endswith(tuple(sup_audioext))] - -uvr5_names = [name.replace(".pth", "") - for name in os.listdir(weight_uvr5_root) - if name.endswith(".pth") or "onnx" in name] - - -check_for_name = lambda: sorted(names)[0] if names else '' - -datasets=[] -for foldername in os.listdir(os.path.join(now_dir, datasets_root)): - if "." not in foldername: - datasets.append(os.path.join(easy_infer.find_folder_parent(".","pretrained"),"datasets",foldername)) - -def get_dataset(): - if len(datasets) > 0: - return sorted(datasets)[0] - else: - return '' - -def update_model_choices(select_value): - model_ids = get_model_list() - model_ids_list = list(model_ids) - if select_value == "VR": - return {"choices": uvr5_names, "__type__": "update"} - elif select_value == "MDX": - return {"choices": model_ids_list, "__type__": "update"} - -set_bark_voice = easy_infer.get_bark_voice() -set_edge_voice = easy_infer.get_edge_voice() - -def update_tts_methods_voice(select_value): - #["Edge-tts", "RVG-tts", "Bark-tts"] - if select_value == "Edge-tts": - return {"choices": set_edge_voice, "value": "", "__type__": "update"} - elif select_value == "Bark-tts": - return {"choices": set_bark_voice, "value": "", "__type__": "update"} - - -def update_dataset_list(name): - new_datasets = [] - for foldername in os.listdir(os.path.join(now_dir, datasets_root)): - if "." not in foldername: - new_datasets.append(os.path.join(easy_infer.find_folder_parent(".","pretrained"),"datasets",foldername)) - return gr.Dropdown.update(choices=new_datasets) - -def get_indexes(): - indexes_list = [ - os.path.join(dirpath, filename) - for dirpath, _, filenames in os.walk(index_root) - for filename in filenames - if filename.endswith(".index") and "trained" not in filename - ] - - return indexes_list if indexes_list else '' - -def get_fshift_presets(): - fshift_presets_list = [ - os.path.join(dirpath, filename) - for dirpath, _, filenames in os.walk(fshift_root) - for filename in filenames - if filename.endswith(".txt") - ] - - return fshift_presets_list if fshift_presets_list else '' - -import soundfile as sf - -def generate_output_path(output_folder, base_name, extension): - # Generar un nombre único para el archivo de salida - index = 1 - while True: - output_path = os.path.join(output_folder, f"{base_name}_{index}.{extension}") - if not os.path.exists(output_path): - return output_path - index += 1 - -def combine_and_save_audios(audio1_path, audio2_path, output_path, volume_factor_audio1, volume_factor_audio2): - audio1, sr1 = librosa.load(audio1_path, sr=None) - audio2, sr2 = librosa.load(audio2_path, sr=None) - - # Alinear las tasas de muestreo - if sr1 != sr2: - if sr1 > sr2: - audio2 = librosa.resample(audio2, orig_sr=sr2, target_sr=sr1) - else: - audio1 = librosa.resample(audio1, orig_sr=sr1, target_sr=sr2) - - # Ajustar los audios para que tengan la misma longitud - target_length = min(len(audio1), len(audio2)) - audio1 = librosa.util.fix_length(audio1, target_length) - audio2 = librosa.util.fix_length(audio2, target_length) - - # Ajustar el volumen de los audios multiplicando por el factor de ganancia - if volume_factor_audio1 != 1.0: - audio1 *= volume_factor_audio1 - if volume_factor_audio2 != 1.0: - audio2 *= volume_factor_audio2 - - # Combinar los audios - combined_audio = audio1 + audio2 - - sf.write(output_path, combined_audio, sr1) - -# Resto de tu código... - -# Define función de conversión llamada por el botón -def audio_combined(audio1_path, audio2_path, volume_factor_audio1=1.0, volume_factor_audio2=1.0, reverb_enabled=False, compressor_enabled=False, noise_gate_enabled=False): - output_folder = os.path.join(now_dir, "audio-outputs") - os.makedirs(output_folder, exist_ok=True) - - # Generar nombres únicos para los archivos de salida - base_name = "combined_audio" - extension = "wav" - output_path = generate_output_path(output_folder, base_name, extension) - print(reverb_enabled) - print(compressor_enabled) - print(noise_gate_enabled) - - if reverb_enabled or compressor_enabled or noise_gate_enabled: - # Procesa el primer audio con los efectos habilitados - base_name = "effect_audio" - output_path = generate_output_path(output_folder, base_name, extension) - processed_audio_path = audioEffects.process_audio(audio2_path, output_path, reverb_enabled, compressor_enabled, noise_gate_enabled) - base_name = "combined_audio" - output_path = generate_output_path(output_folder, base_name, extension) - # Combina el audio procesado con el segundo audio usando audio_combined - combine_and_save_audios(audio1_path, processed_audio_path, output_path, volume_factor_audio1, volume_factor_audio2) - - return i18n("Conversion complete!"), output_path - else: - base_name = "combined_audio" - output_path = generate_output_path(output_folder, base_name, extension) - # No hay efectos habilitados, combina directamente los audios sin procesar - combine_and_save_audios(audio1_path, audio2_path, output_path, volume_factor_audio1, volume_factor_audio2) - - return i18n("Conversion complete!"), output_path - - - - -def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0,architecture): - infos = [] - if architecture == "VR": - try: - inp_root, save_root_vocal, save_root_ins = [x.strip(" ").strip('"').strip("\n").strip('"').strip(" ") for x in [inp_root, save_root_vocal, save_root_ins]] - usable_files = [os.path.join(inp_root, file) - for file in os.listdir(inp_root) - if file.endswith(tuple(sup_audioext))] - - - pre_fun = MDXNetDereverb(15) if model_name == "onnx_dereverb_By_FoxJoy" else (_audio_pre_ if "DeEcho" not in model_name else _audio_pre_new)( - agg=int(agg), - model_path=os.path.join(weight_uvr5_root, model_name + ".pth"), - device=config.device, - is_half=config.is_half, - ) - - try: - if paths != None: - paths = [path.name for path in paths] - else: - paths = usable_files - - except: - traceback.print_exc() - paths = usable_files - print(paths) - for path in paths: - inp_path = os.path.join(inp_root, path) - need_reformat, done = 1, 0 - - try: - info = ffmpeg.probe(inp_path, cmd="ffprobe") - if info["streams"][0]["channels"] == 2 and info["streams"][0]["sample_rate"] == "44100": - need_reformat = 0 - pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal, format0) - done = 1 - except: - traceback.print_exc() - - if need_reformat: - tmp_path = f"{tmp}/{os.path.basename(RQuote(inp_path))}.reformatted.wav" - os.system(f"ffmpeg -i {RQuote(inp_path)} -vn -acodec pcm_s16le -ac 2 -ar 44100 {RQuote(tmp_path)} -y") - inp_path = tmp_path - - try: - if not done: - pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal, format0) - infos.append(f"{os.path.basename(inp_path)}->Success") - yield "\n".join(infos) - except: - infos.append(f"{os.path.basename(inp_path)}->{traceback.format_exc()}") - yield "\n".join(infos) - except: - infos.append(traceback.format_exc()) - yield "\n".join(infos) - finally: - try: - if model_name == "onnx_dereverb_By_FoxJoy": - del pre_fun.pred.model - del pre_fun.pred.model_ - else: - del pre_fun.model - - del pre_fun - except: traceback.print_exc() + my_applio = "ParityError/Interstellar" - print("clean_empty_cache") - - if torch.cuda.is_available(): torch.cuda.empty_cache() - - yield "\n".join(infos) - elif architecture == "MDX": - try: - infos.append(i18n("Starting audio conversion... (This might take a moment)")) - yield "\n".join(infos) - inp_root, save_root_vocal, save_root_ins = [x.strip(" ").strip('"').strip("\n").strip('"').strip(" ") for x in [inp_root, save_root_vocal, save_root_ins]] - - usable_files = [os.path.join(inp_root, file) - for file in os.listdir(inp_root) - if file.endswith(tuple(sup_audioext))] - try: - if paths != None: - paths = [path.name for path in paths] - else: - paths = usable_files - - except: - traceback.print_exc() - paths = usable_files - print(paths) - invert=True - denoise=True - use_custom_parameter=True - dim_f=3072 - dim_t=256 - n_fft=7680 - use_custom_compensation=True - compensation=1.025 - suffix = "Vocals_custom" #@param ["Vocals", "Drums", "Bass", "Other"]{allow-input: true} - suffix_invert = "Instrumental_custom" #@param ["Instrumental", "Drumless", "Bassless", "Instruments"]{allow-input: true} - print_settings = True # @param{type:"boolean"} - onnx = id_to_ptm(model_name) - compensation = compensation if use_custom_compensation or use_custom_parameter else None - mdx_model = prepare_mdx(onnx,use_custom_parameter, dim_f, dim_t, n_fft, compensation=compensation) - - - for path in paths: - #inp_path = os.path.join(inp_root, path) - suffix_naming = suffix if use_custom_parameter else None - diff_suffix_naming = suffix_invert if use_custom_parameter else None - run_mdx(onnx, mdx_model, path, format0, diff=invert,suffix=suffix_naming,diff_suffix=diff_suffix_naming,denoise=denoise) - - if print_settings: - print() - print('[MDX-Net_Colab settings used]') - print(f'Model used: {onnx}') - print(f'Model MD5: {mdx.MDX.get_hash(onnx)}') - print(f'Model parameters:') - print(f' -dim_f: {mdx_model.dim_f}') - print(f' -dim_t: {mdx_model.dim_t}') - print(f' -n_fft: {mdx_model.n_fft}') - print(f' -compensation: {mdx_model.compensation}') - print() - print('[Input file]') - print('filename(s): ') - for filename in paths: - print(f' -{filename}') - infos.append(f"{os.path.basename(filename)}->Success") - yield "\n".join(infos) - except: - infos.append(traceback.format_exc()) - yield "\n".join(infos) - finally: - try: - del mdx_model - except: traceback.print_exc() - - print("clean_empty_cache") - - if torch.cuda.is_available(): torch.cuda.empty_cache() - - - - - -def change_choices(): - names = [os.path.join(root, file) - for root, _, files in os.walk(weight_root) - for file in files - if file.endswith((".pth", ".onnx"))] - indexes_list = [os.path.join(root, name) for root, _, files in os.walk(index_root, topdown=False) for name in files if name.endswith(".index") and "trained" not in name] - audio_paths = [os.path.join(audio_root, file) for file in os.listdir(os.path.join(now_dir, "audios"))] - - - return ( - {"choices": sorted(names), "__type__": "update"}, - {"choices": sorted(indexes_list), "__type__": "update"}, - {"choices": sorted(audio_paths), "__type__": "update"} - ) -def change_choices2(): - names = [os.path.join(root, file) - for root, _, files in os.walk(weight_root) - for file in files - if file.endswith((".pth", ".onnx"))] - indexes_list = [os.path.join(root, name) for root, _, files in os.walk(index_root, topdown=False) for name in files if name.endswith(".index") and "trained" not in name] - - - return ( - {"choices": sorted(names), "__type__": "update"}, - {"choices": sorted(indexes_list), "__type__": "update"}, - ) -def change_choices3(): - - audio_paths = [os.path.join(audio_root, file) for file in os.listdir(os.path.join(now_dir, "audios"))] - audio_others_paths = [os.path.join(audio_others_root, file) for file in os.listdir(os.path.join(now_dir, "audio-others"))] - - - return ( - {"choices": sorted(audio_others_paths), "__type__": "update"}, - {"choices": sorted(audio_paths), "__type__": "update"} - ) - -def clean(): - return {"value": "", "__type__": "update"} -def export_onnx(): - from infer.modules.onnx.export import export_onnx as eo - - eo() - -sr_dict = { - "32k": 32000, - "40k": 40000, - "48k": 48000, -} - - -def if_done(done, p): - while 1: - if p.poll() is None: - sleep(0.5) - else: - break - done[0] = True - - -def if_done_multi(done, ps): - while 1: - # poll==None代表进程未结束 - # 只要有一个进程未结束都不停 - flag = 1 - for p in ps: - if p.poll() is None: - flag = 0 - sleep(0.5) - break - if flag == 1: - break - done[0] = True - -def formant_enabled( - cbox, qfrency, tmbre, frmntapply, formantpreset, formant_refresh_button -): - if cbox: - DoFormant = True - CSVutil("csvdb/formanting.csv", "w+", "formanting", DoFormant, qfrency, tmbre) - - # print(f"is checked? - {cbox}\ngot {DoFormant}") - - return ( - {"value": True, "__type__": "update"}, - {"visible": True, "__type__": "update"}, - {"visible": True, "__type__": "update"}, - {"visible": True, "__type__": "update"}, - {"visible": True, "__type__": "update"}, - {"visible": True, "__type__": "update"}, - ) - - else: - DoFormant = False - CSVutil("csvdb/formanting.csv", "w+", "formanting", DoFormant, qfrency, tmbre) - - # print(f"is checked? - {cbox}\ngot {DoFormant}") - return ( - {"value": False, "__type__": "update"}, - {"visible": False, "__type__": "update"}, - {"visible": False, "__type__": "update"}, - {"visible": False, "__type__": "update"}, - {"visible": False, "__type__": "update"}, - {"visible": False, "__type__": "update"}, - {"visible": False, "__type__": "update"}, - ) - - -def formant_apply(qfrency, tmbre): - Quefrency = qfrency - Timbre = tmbre - DoFormant = True - CSVutil("csvdb/formanting.csv", "w+", "formanting", DoFormant, qfrency, tmbre) - - return ( - {"value": Quefrency, "__type__": "update"}, - {"value": Timbre, "__type__": "update"}, - ) - -def update_fshift_presets(preset, qfrency, tmbre): - - if preset: - with open(preset, 'r') as p: - content = p.readlines() - qfrency, tmbre = content[0].strip(), content[1] - - formant_apply(qfrency, tmbre) - else: - qfrency, tmbre = preset_apply(preset, qfrency, tmbre) - - return ( - {"choices": get_fshift_presets(), "__type__": "update"}, - {"value": qfrency, "__type__": "update"}, - {"value": tmbre, "__type__": "update"}, - ) - -def preprocess_dataset(trainset_dir, exp_dir, sr, n_p): - sr = sr_dict[sr] - os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) - f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w") - f.close() - per = 3.0 if config.is_half else 3.7 - cmd = '"%s" infer/modules/train/preprocess.py "%s" %s %s "%s/logs/%s" %s %.1f' % ( - config.python_cmd, - trainset_dir, - sr, - n_p, - now_dir, - exp_dir, - config.noparallel, - per, - ) - logger.info(cmd) - p = Popen(cmd, shell=True) # , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir - ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 - done = [False] - threading.Thread( - target=if_done, - args=( - done, - p, - ), - ).start() - while 1: - with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f: - yield (f.read()) - sleep(1) - if done[0]: - break - with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f: - log = f.read() - logger.info(log) - yield log - - -def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, echl, gpus_rmvpe): - gpus = gpus.split("-") - os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) - f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w") - f.close() - if if_f0: - if f0method != "rmvpe_gpu": - cmd = ( - '"%s" infer/modules/train/extract/extract_f0_print.py "%s/logs/%s" %s %s' - % ( - config.python_cmd, - now_dir, - exp_dir, - n_p, - f0method, - echl, - ) - ) - logger.info(cmd) - p = Popen( - cmd, shell=True, cwd=now_dir - ) # , stdin=PIPE, stdout=PIPE,stderr=PIPE - ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 - done = [False] - threading.Thread( - target=if_done, - args=( - done, - p, - ), - ).start() - else: - if gpus_rmvpe != "-": - gpus_rmvpe = gpus_rmvpe.split("-") - leng = len(gpus_rmvpe) - ps = [] - for idx, n_g in enumerate(gpus_rmvpe): - cmd = ( - '"%s" infer/modules/train/extract/extract_f0_rmvpe.py %s %s %s "%s/logs/%s" %s ' - % ( - config.python_cmd, - leng, - idx, - n_g, - now_dir, - exp_dir, - config.is_half, - ) - ) - logger.info(cmd) - p = Popen( - cmd, shell=True, cwd=now_dir - ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir - ps.append(p) - ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 - done = [False] - threading.Thread( - target=if_done_multi, # - args=( - done, - ps, - ), - ).start() - else: - cmd = ( - config.python_cmd - + ' infer/modules/train/extract/extract_f0_rmvpe_dml.py "%s/logs/%s" ' - % ( - now_dir, - exp_dir, - ) - ) - logger.info(cmd) - p = Popen( - cmd, shell=True, cwd=now_dir - ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir - p.wait() - done = [True] - while 1: - with open( - "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" - ) as f: - yield (f.read()) - sleep(1) - if done[0]: - break - with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: - log = f.read() - logger.info(log) - yield log - ####对不同part分别开多进程 - """ - n_part=int(sys.argv[1]) - i_part=int(sys.argv[2]) - i_gpu=sys.argv[3] - exp_dir=sys.argv[4] - os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu) - """ - leng = len(gpus) - ps = [] - for idx, n_g in enumerate(gpus): - cmd = ( - '"%s" infer/modules/train/extract_feature_print.py %s %s %s %s "%s/logs/%s" %s' - % ( - config.python_cmd, - config.device, - leng, - idx, - n_g, - now_dir, - exp_dir, - version19, - ) +with gr.Blocks(theme=my_applio, title="Applio") as Applio: + gr.Markdown("# Applio") + gr.Markdown("### From the first Applio to the last") + gr.Markdown( + i18n( + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience." ) - logger.info(cmd) - p = Popen( - cmd, shell=True, cwd=now_dir - ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir - ps.append(p) - ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 - done = [False] - threading.Thread( - target=if_done_multi, - args=( - done, - ps, - ), - ).start() - while 1: - with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: - yield (f.read()) - sleep(1) - if done[0]: - break - with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: - log = f.read() - logger.info(log) - yield log - -def get_pretrained_models(path_str, f0_str, sr2): - if_pretrained_generator_exist = os.access( - "assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK - ) - if_pretrained_discriminator_exist = os.access( - "assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK - ) - if not if_pretrained_generator_exist: - logger.warn( - "assets/pretrained%s/%sG%s.pth not exist, will not use pretrained model", - path_str, - f0_str, - sr2, - ) - if not if_pretrained_discriminator_exist: - logger.warn( - "assets/pretrained%s/%sD%s.pth not exist, will not use pretrained model", - path_str, - f0_str, - sr2, - ) - return ( - "assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2) - if if_pretrained_generator_exist - else "", - "assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2) - if if_pretrained_discriminator_exist - else "", - ) - -def change_sr2(sr2, if_f0_3, version19): - path_str = "" if version19 == "v1" else "_v2" - f0_str = "f0" if if_f0_3 else "" - return get_pretrained_models(path_str, f0_str, sr2) - - -def change_version19(sr2, if_f0_3, version19): - path_str = "" if version19 == "v1" else "_v2" - if sr2 == "32k" and version19 == "v1": - sr2 = "40k" - to_return_sr2 = ( - {"choices": ["40k", "48k"], "__type__": "update", "value": sr2} - if version19 == "v1" - else {"choices": ["40k", "48k", "32k"], "__type__": "update", "value": sr2} - ) - f0_str = "f0" if if_f0_3 else "" - return ( - *get_pretrained_models(path_str, f0_str, sr2), - to_return_sr2, - ) - - -def change_f0(if_f0_3, sr2, version19): # f0method8,pretrained_G14,pretrained_D15 - path_str = "" if version19 == "v1" else "_v2" - return ( - {"visible": if_f0_3, "__type__": "update"}, - *get_pretrained_models(path_str, "f0", sr2), - ) - - -global log_interval - -def set_log_interval(exp_dir, batch_size12): - log_interval = 1 - folder_path = os.path.join(exp_dir, "1_16k_wavs") - - if os.path.isdir(folder_path): - wav_files_num = len(glob1(folder_path,"*.wav")) - - if wav_files_num > 0: - log_interval = math.ceil(wav_files_num / batch_size12) - if log_interval > 1: - log_interval += 1 - - return log_interval - -global PID, PROCESS - -def click_train( - exp_dir1, - sr2, - if_f0_3, - spk_id5, - save_epoch10, - total_epoch11, - batch_size12, - if_save_latest13, - pretrained_G14, - pretrained_D15, - gpus16, - if_cache_gpu17, - if_save_every_weights18, - version19, -): - CSVutil("csvdb/stop.csv", "w+", "formanting", False) - # 生成filelist - exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) - os.makedirs(exp_dir, exist_ok=True) - gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir) - feature_dir = ( - "%s/3_feature256" % (exp_dir) - if version19 == "v1" - else "%s/3_feature768" % (exp_dir) ) - if if_f0_3: - f0_dir = "%s/2a_f0" % (exp_dir) - f0nsf_dir = "%s/2b-f0nsf" % (exp_dir) - names = ( - set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) - & set([name.split(".")[0] for name in os.listdir(feature_dir)]) - & set([name.split(".")[0] for name in os.listdir(f0_dir)]) - & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)]) - ) - else: - names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set( - [name.split(".")[0] for name in os.listdir(feature_dir)] - ) - opt = [] - for name in names: - if if_f0_3: - opt.append( - "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s" - % ( - gt_wavs_dir.replace("\\", "\\\\"), - name, - feature_dir.replace("\\", "\\\\"), - name, - f0_dir.replace("\\", "\\\\"), - name, - f0nsf_dir.replace("\\", "\\\\"), - name, - spk_id5, - ) - ) - else: - opt.append( - "%s/%s.wav|%s/%s.npy|%s" - % ( - gt_wavs_dir.replace("\\", "\\\\"), - name, - feature_dir.replace("\\", "\\\\"), - name, - spk_id5, - ) - ) - fea_dim = 256 if version19 == "v1" else 768 - if if_f0_3: - for _ in range(2): - opt.append( - "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s" - % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5) - ) - else: - for _ in range(2): - opt.append( - "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s" - % (now_dir, sr2, now_dir, fea_dim, spk_id5) - ) - shuffle(opt) - with open("%s/filelist.txt" % exp_dir, "w") as f: - f.write("\n".join(opt)) - logger.debug("Write filelist done") - # 生成config#无需生成config - # cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0" - logger.info("Use gpus: %s", str(gpus16)) - if pretrained_G14 == "": - logger.info("No pretrained Generator") - if pretrained_D15 == "": - logger.info("No pretrained Discriminator") - if version19 == "v1" or sr2 == "40k": - config_path = "v1/%s.json" % sr2 - else: - config_path = "v2/%s.json" % sr2 - config_save_path = os.path.join(exp_dir, "config.json") - if not pathlib.Path(config_save_path).exists(): - with open(config_save_path, "w", encoding="utf-8") as f: - json.dump( - config.json_config[config_path], - f, - ensure_ascii=False, - indent=4, - sort_keys=True, - ) - f.write("\n") - if gpus16: - cmd = ( - '"%s" infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s' - % ( - config.python_cmd, - exp_dir1, - sr2, - 1 if if_f0_3 else 0, - batch_size12, - gpus16, - total_epoch11, - save_epoch10, - "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "", - "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "", - 1 if if_save_latest13 == True else 0, - 1 if if_cache_gpu17 == True else 0, - 1 if if_save_every_weights18 == True else 0, - version19, - ) + gr.Markdown( + i18n( + "[Support](https://discord.gg/IAHispano) — [Discord Bot](https://discord.com/oauth2/authorize?client_id=1144714449563955302&permissions=1376674695271&scope=bot%20applications.commands) — [Find Voices](https://applio.org/models) — [GitHub](https://github.com/IAHispano/Applio)" ) - else: - cmd = ( - '"%s" infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s' - % ( - config.python_cmd, - exp_dir1, - sr2, - 1 if if_f0_3 else 0, - batch_size12, - total_epoch11, - save_epoch10, - "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "", - "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "", - 1 if if_save_latest13 == True else 0, - 1 if if_cache_gpu17 == True else 0, - 1 if if_save_every_weights18 == True else 0, - version19, - ) - ) - logger.info(cmd) - global p - p = Popen(cmd, shell=True, cwd=now_dir) - global PID - PID = p.pid - - p.wait() - - return i18n("Training is done, check train.log"), {"visible": False, "__type__": "update"}, {"visible": True, "__type__": "update"} - - -def train_index(exp_dir1, version19): - # exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) - exp_dir = "logs/%s" % (exp_dir1) - os.makedirs(exp_dir, exist_ok=True) - feature_dir = ( - "%s/3_feature256" % (exp_dir) - if version19 == "v1" - else "%s/3_feature768" % (exp_dir) ) - if not os.path.exists(feature_dir): - return "请先进行特征提取!" - listdir_res = list(os.listdir(feature_dir)) - if len(listdir_res) == 0: - return "请先进行特征提取!" - infos = [] - npys = [] - for name in sorted(listdir_res): - phone = np.load("%s/%s" % (feature_dir, name)) - npys.append(phone) - big_npy = np.concatenate(npys, 0) - big_npy_idx = np.arange(big_npy.shape[0]) - np.random.shuffle(big_npy_idx) - big_npy = big_npy[big_npy_idx] - if big_npy.shape[0] > 2e5: - infos.append("Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0]) - yield "\n".join(infos) - try: - big_npy = ( - MiniBatchKMeans( - n_clusters=10000, - verbose=True, - batch_size=256 * config.n_cpu, - compute_labels=False, - init="random", - ) - .fit(big_npy) - .cluster_centers_ - ) - except: - info = traceback.format_exc() - logger.info(info) - infos.append(info) - yield "\n".join(infos) + with gr.Tab(i18n("Inference")): + inference_tab() - np.save("%s/total_fea.npy" % exp_dir, big_npy) - n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) - infos.append("%s,%s" % (big_npy.shape, n_ivf)) - yield "\n".join(infos) - index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf) - # index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf) - infos.append("training") - yield "\n".join(infos) - index_ivf = faiss.extract_index_ivf(index) # - index_ivf.nprobe = 1 - index.train(big_npy) - faiss.write_index( - index, - "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index" - % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), - ) - - infos.append("adding") - yield "\n".join(infos) - batch_size_add = 8192 - for i in range(0, big_npy.shape[0], batch_size_add): - index.add(big_npy[i : i + batch_size_add]) - faiss.write_index( - index, - "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index" - % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), - ) - infos.append( - "Successful Index Construction,added_IVF%s_Flat_nprobe_%s_%s_%s.index" - % (n_ivf, index_ivf.nprobe, exp_dir1, version19) - ) - # faiss.write_index(index, '%s/added_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19)) - # infos.append("成功构建索引,added_IVF%s_Flat_FastScan_%s.index"%(n_ivf,version19)) - yield "\n".join(infos) + with gr.Tab(i18n("TTS")): + tts_tab() -def change_info_(ckpt_path): - if not os.path.exists(ckpt_path.replace(os.path.basename(ckpt_path), "train.log")): - return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"} - try: - with open( - ckpt_path.replace(os.path.basename(ckpt_path), "train.log"), "r" - ) as f: - info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1]) - sr, f0 = info["sample_rate"], info["if_f0"] - version = "v2" if ("version" in info and info["version"] == "v2") else "v1" - return sr, str(f0), version - except: - traceback.print_exc() - return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"} + with gr.Tab(i18n("Download")): + download_tab() -F0GPUVisible = config.dml == False +def launch_gradio(): + Applio.launch() -def change_f0_method(f0method8): - if f0method8 == "rmvpe_gpu": - visible = F0GPUVisible - else: - visible = False - return {"visible": visible, "__type__": "update"} - - - -def export_onnx(model_path, exported_path): - device = torch.device("cpu") - checkpoint = torch.load(model_path, map_location=device) - vec_channels = 256 if checkpoint.get("version", "v1") == "v1" else 768 - - test_inputs = { - "phone": torch.rand(1, 200, vec_channels), - "phone_lengths": torch.LongTensor([200]), - "pitch": torch.randint(5, 255, (1, 200)), - "pitchf": torch.rand(1, 200), - "ds": torch.zeros(1).long(), - "rnd": torch.rand(1, 192, 200) - } - - checkpoint["config"][-3] = checkpoint["weight"]["emb_g.weight"].shape[0] - net_g = SynthesizerTrnMsNSFsidM(*checkpoint["config"], is_half=False, version=checkpoint.get("version", "v1")) - - net_g.load_state_dict(checkpoint["weight"], strict=False) - net_g = net_g.to(device) - - dynamic_axes = {"phone": [1], "pitch": [1], "pitchf": [1], "rnd": [2]} - - torch.onnx.export( - net_g, - tuple(value.to(device) for value in test_inputs.values()), - exported_path, - dynamic_axes=dynamic_axes, - do_constant_folding=False, - opset_version=13, - verbose=False, - input_names=list(test_inputs.keys()), - output_names=["audio"], - ) - return "Finished" - - - -import re as regex -import scipy.io.wavfile as wavfile - -cli_current_page = "HOME" - - -def cli_split_command(com): - exp = r'(?:(?<=\s)|^)"(.*?)"(?=\s|$)|(\S+)' - split_array = regex.findall(exp, com) - split_array = [group[0] if group[0] else group[1] for group in split_array] - return split_array - - -def execute_generator_function(genObject): - for _ in genObject: - pass - - -def cli_infer(com): - # get VC first - com = cli_split_command(com) - model_name = com[0] - source_audio_path = com[1] - output_file_name = com[2] - feature_index_path = com[3] - f0_file = None # Not Implemented Yet - - # Get parameters for inference - speaker_id = int(com[4]) - transposition = float(com[5]) - f0_method = com[6] - crepe_hop_length = int(com[7]) - harvest_median_filter = int(com[8]) - resample = int(com[9]) - mix = float(com[10]) - feature_ratio = float(com[11]) - protection_amnt = float(com[12]) - protect1 = 0.5 - - if com[14] == "False" or com[14] == "false": - DoFormant = False - Quefrency = 0.0 - Timbre = 0.0 - CSVutil( - "csvdb/formanting.csv", "w+", "formanting", DoFormant, Quefrency, Timbre - ) - - else: - DoFormant = True - Quefrency = float(com[15]) - Timbre = float(com[16]) - CSVutil( - "csvdb/formanting.csv", "w+", "formanting", DoFormant, Quefrency, Timbre - ) - - print("Mangio-RVC-Fork Infer-CLI: Starting the inference...") - vc_data = vc.get_vc(model_name, protection_amnt, protect1) - print(vc_data) - print("Mangio-RVC-Fork Infer-CLI: Performing inference...") - conversion_data = vc.vc_single( - speaker_id, - source_audio_path, - source_audio_path, - transposition, - f0_file, - f0_method, - feature_index_path, - feature_index_path, - feature_ratio, - harvest_median_filter, - resample, - mix, - protection_amnt, - crepe_hop_length, - ) - if "Success." in conversion_data[0]: - print( - "Mangio-RVC-Fork Infer-CLI: Inference succeeded. Writing to %s/%s..." - % ("audio-outputs", output_file_name) - ) - wavfile.write( - "%s/%s" % ("audio-outputs", output_file_name), - conversion_data[1][0], - conversion_data[1][1], - ) - print( - "Mangio-RVC-Fork Infer-CLI: Finished! Saved output to %s/%s" - % ("audio-outputs", output_file_name) - ) - else: - print("Mangio-RVC-Fork Infer-CLI: Inference failed. Here's the traceback: ") - print(conversion_data[0]) - - -def cli_pre_process(com): - com = cli_split_command(com) - model_name = com[0] - trainset_directory = com[1] - sample_rate = com[2] - num_processes = int(com[3]) - - print("Mangio-RVC-Fork Pre-process: Starting...") - generator = preprocess_dataset( - trainset_directory, model_name, sample_rate, num_processes - ) - execute_generator_function(generator) - print("Mangio-RVC-Fork Pre-process: Finished") - - -def cli_extract_feature(com): - com = cli_split_command(com) - model_name = com[0] - gpus = com[1] - num_processes = int(com[2]) - has_pitch_guidance = True if (int(com[3]) == 1) else False - f0_method = com[4] - crepe_hop_length = int(com[5]) - version = com[6] # v1 or v2 - - print("Mangio-RVC-CLI: Extract Feature Has Pitch: " + str(has_pitch_guidance)) - print("Mangio-RVC-CLI: Extract Feature Version: " + str(version)) - print("Mangio-RVC-Fork Feature Extraction: Starting...") - generator = extract_f0_feature( - gpus, - num_processes, - f0_method, - has_pitch_guidance, - model_name, - version, - crepe_hop_length, - ) - execute_generator_function(generator) - print("Mangio-RVC-Fork Feature Extraction: Finished") - - -def cli_train(com): - com = cli_split_command(com) - model_name = com[0] - sample_rate = com[1] - has_pitch_guidance = True if (int(com[2]) == 1) else False - speaker_id = int(com[3]) - save_epoch_iteration = int(com[4]) - total_epoch = int(com[5]) # 10000 - batch_size = int(com[6]) - gpu_card_slot_numbers = com[7] - if_save_latest = True if (int(com[8]) == 1) else False - if_cache_gpu = True if (int(com[9]) == 1) else False - if_save_every_weight = True if (int(com[10]) == 1) else False - version = com[11] - - pretrained_base = "pretrained/" if version == "v1" else "pretrained_v2/" - - g_pretrained_path = "%sf0G%s.pth" % (pretrained_base, sample_rate) - d_pretrained_path = "%sf0D%s.pth" % (pretrained_base, sample_rate) - - print("Mangio-RVC-Fork Train-CLI: Training...") - click_train( - model_name, - sample_rate, - has_pitch_guidance, - speaker_id, - save_epoch_iteration, - total_epoch, - batch_size, - if_save_latest, - g_pretrained_path, - d_pretrained_path, - gpu_card_slot_numbers, - if_cache_gpu, - if_save_every_weight, - version, - ) - - -def cli_train_feature(com): - com = cli_split_command(com) - model_name = com[0] - version = com[1] - print("Mangio-RVC-Fork Train Feature Index-CLI: Training... Please wait") - generator = train_index(model_name, version) - execute_generator_function(generator) - print("Mangio-RVC-Fork Train Feature Index-CLI: Done!") - - -def cli_extract_model(com): - com = cli_split_command(com) - model_path = com[0] - save_name = com[1] - sample_rate = com[2] - has_pitch_guidance = com[3] - info = com[4] - version = com[5] - extract_small_model_process = extract_small_model( - model_path, save_name, sample_rate, has_pitch_guidance, info, version - ) - if extract_small_model_process == "Success.": - print("Mangio-RVC-Fork Extract Small Model: Success!") - else: - print(str(extract_small_model_process)) - print("Mangio-RVC-Fork Extract Small Model: Failed!") - - -def preset_apply(preset, qfer, tmbr): - if str(preset) != "": - with open(str(preset), "r") as p: - content = p.readlines() - qfer, tmbr = content[0].split("\n")[0], content[1] - formant_apply(qfer, tmbr) - else: - pass - return ( - {"value": qfer, "__type__": "update"}, - {"value": tmbr, "__type__": "update"}, - ) - - -def print_page_details(): - if cli_current_page == "HOME": - print( - "\n go home : Takes you back to home with a navigation list." - "\n go infer : Takes you to inference command execution." - "\n go pre-process : Takes you to training step.1) pre-process command execution." - "\n go extract-feature : Takes you to training step.2) extract-feature command execution." - "\n go train : Takes you to training step.3) being or continue training command execution." - "\n go train-feature : Takes you to the train feature index command execution." - "\n go extract-model : Takes you to the extract small model command execution." - ) - elif cli_current_page == "INFER": - print( - "\n arg 1) model name with .pth in ./weights: mi-test.pth" - "\n arg 2) source audio path: myFolder\\MySource.wav" - "\n arg 3) output file name to be placed in './audio-outputs': MyTest.wav" - "\n arg 4) feature index file path: logs/mi-test/added_IVF3042_Flat_nprobe_1.index" - "\n arg 5) speaker id: 0" - "\n arg 6) transposition: 0" - "\n arg 7) f0 method: harvest (pm, harvest, crepe, crepe-tiny, hybrid[x,x,x,x], mangio-crepe, mangio-crepe-tiny, rmvpe)" - "\n arg 8) crepe hop length: 160" - "\n arg 9) harvest median filter radius: 3 (0-7)" - "\n arg 10) post resample rate: 0" - "\n arg 11) mix volume envelope: 1" - "\n arg 12) feature index ratio: 0.78 (0-1)" - "\n arg 13) Voiceless Consonant Protection (Less Artifact): 0.33 (Smaller number = more protection. 0.50 means Dont Use.)" - "\n arg 14) Whether to formant shift the inference audio before conversion: False (if set to false, you can ignore setting the quefrency and timbre values for formanting)" - "\n arg 15)* Quefrency for formanting: 8.0 (no need to set if arg14 is False/false)" - "\n arg 16)* Timbre for formanting: 1.2 (no need to set if arg14 is False/false) \n" - "\nExample: mi-test.pth saudio/Sidney.wav myTest.wav logs/mi-test/added_index.index 0 -2 harvest 160 3 0 1 0.95 0.33 0.45 True 8.0 1.2" - ) - elif cli_current_page == "PRE-PROCESS": - print( - "\n arg 1) Model folder name in ./logs: mi-test" - "\n arg 2) Trainset directory: mydataset (or) E:\\my-data-set" - "\n arg 3) Sample rate: 40k (32k, 40k, 48k)" - "\n arg 4) Number of CPU threads to use: 8 \n" - "\nExample: mi-test mydataset 40k 24" - ) - elif cli_current_page == "EXTRACT-FEATURE": - print( - "\n arg 1) Model folder name in ./logs: mi-test" - "\n arg 2) Gpu card slot: 0 (0-1-2 if using 3 GPUs)" - "\n arg 3) Number of CPU threads to use: 8" - "\n arg 4) Has Pitch Guidance?: 1 (0 for no, 1 for yes)" - "\n arg 5) f0 Method: harvest (pm, harvest, dio, crepe)" - "\n arg 6) Crepe hop length: 128" - "\n arg 7) Version for pre-trained models: v2 (use either v1 or v2)\n" - "\nExample: mi-test 0 24 1 harvest 128 v2" - ) - elif cli_current_page == "TRAIN": - print( - "\n arg 1) Model folder name in ./logs: mi-test" - "\n arg 2) Sample rate: 40k (32k, 40k, 48k)" - "\n arg 3) Has Pitch Guidance?: 1 (0 for no, 1 for yes)" - "\n arg 4) speaker id: 0" - "\n arg 5) Save epoch iteration: 50" - "\n arg 6) Total epochs: 10000" - "\n arg 7) Batch size: 8" - "\n arg 8) Gpu card slot: 0 (0-1-2 if using 3 GPUs)" - "\n arg 9) Save only the latest checkpoint: 0 (0 for no, 1 for yes)" - "\n arg 10) Whether to cache training set to vram: 0 (0 for no, 1 for yes)" - "\n arg 11) Save extracted small model every generation?: 0 (0 for no, 1 for yes)" - "\n arg 12) Model architecture version: v2 (use either v1 or v2)\n" - "\nExample: mi-test 40k 1 0 50 10000 8 0 0 0 0 v2" - ) - elif cli_current_page == "TRAIN-FEATURE": - print( - "\n arg 1) Model folder name in ./logs: mi-test" - "\n arg 2) Model architecture version: v2 (use either v1 or v2)\n" - "\nExample: mi-test v2" - ) - elif cli_current_page == "EXTRACT-MODEL": - print( - "\n arg 1) Model Path: logs/mi-test/G_168000.pth" - "\n arg 2) Model save name: MyModel" - "\n arg 3) Sample rate: 40k (32k, 40k, 48k)" - "\n arg 4) Has Pitch Guidance?: 1 (0 for no, 1 for yes)" - '\n arg 5) Model information: "My Model"' - "\n arg 6) Model architecture version: v2 (use either v1 or v2)\n" - '\nExample: logs/mi-test/G_168000.pth MyModel 40k 1 "Created by Cole Mangio" v2' - ) - -def change_page(page): - global cli_current_page - cli_current_page = page - return 0 - -def execute_command(com): - if com == "go home": - return change_page("HOME") - elif com == "go infer": - return change_page("INFER") - elif com == "go pre-process": - return change_page("PRE-PROCESS") - elif com == "go extract-feature": - return change_page("EXTRACT-FEATURE") - elif com == "go train": - return change_page("TRAIN") - elif com == "go train-feature": - return change_page("TRAIN-FEATURE") - elif com == "go extract-model": - return change_page("EXTRACT-MODEL") - else: - if com[:3] == "go ": - print("page '%s' does not exist!" % com[3:]) - return 0 - - if cli_current_page == "INFER": - cli_infer(com) - elif cli_current_page == "PRE-PROCESS": - cli_pre_process(com) - elif cli_current_page == "EXTRACT-FEATURE": - cli_extract_feature(com) - elif cli_current_page == "TRAIN": - cli_train(com) - elif cli_current_page == "TRAIN-FEATURE": - cli_train_feature(com) - elif cli_current_page == "EXTRACT-MODEL": - cli_extract_model(com) - -def cli_navigation_loop(): - while True: - print("\nYou are currently in '%s':" % cli_current_page) - print_page_details() - command = input("%s: " % cli_current_page) - try: - execute_command(command) - except: - print(traceback.format_exc()) - - -if config.is_cli: - print("\n\nMangio-RVC-Fork v2 CLI App!\n") - print( - "Welcome to the CLI version of RVC. Please read the documentation on https://github.com/Mangio621/Mangio-RVC-Fork (README.MD) to understand how to use this app.\n" - ) - cli_navigation_loop() - - - - - -def switch_pitch_controls(f0method0): - is_visible = f0method0 != 'rmvpe' - - if rvc_globals.NotesOrHertz: - return ( - {"visible": False, "__type__": "update"}, - {"visible": is_visible, "__type__": "update"}, - {"visible": False, "__type__": "update"}, - {"visible": is_visible, "__type__": "update"} - ) - else: - return ( - {"visible": is_visible, "__type__": "update"}, - {"visible": False, "__type__": "update"}, - {"visible": is_visible, "__type__": "update"}, - {"visible": False, "__type__": "update"} - ) - -def match_index(sid0): - picked = False - # folder = sid0.split('.')[0] - - # folder = re.split(r'. |_', sid0)[0] - folder = sid0.split(".")[0].split("_")[0] - # folder_test = sid0.split('.')[0].split('_')[0].split('-')[0] - parent_dir = "./logs/" + folder - # print(parent_dir) - if os.path.exists(parent_dir): - # print('path exists') - for filename in os.listdir(parent_dir.replace("\\", "/")): - if filename.endswith(".index"): - for i in range(len(indexes_list)): - if indexes_list[i] == ( - os.path.join(("./logs/" + folder), filename).replace("\\", "/") - ): - # print('regular index found') - break - else: - if indexes_list[i] == ( - os.path.join( - ("./logs/" + folder.lower()), filename - ).replace("\\", "/") - ): - # print('lowered index found') - parent_dir = "./logs/" + folder.lower() - break - # elif (indexes_list[i]).casefold() == ((os.path.join(("./logs/" + folder), filename).replace('\\','/')).casefold()): - # print('8') - # parent_dir = "./logs/" + folder.casefold() - # break - # elif (indexes_list[i]) == ((os.path.join(("./logs/" + folder_test), filename).replace('\\','/'))): - # parent_dir = "./logs/" + folder_test - # print(parent_dir) - # break - # elif (indexes_list[i]) == (os.path.join(("./logs/" + folder_test.lower()), filename).replace('\\','/')): - # parent_dir = "./logs/" + folder_test - # print(parent_dir) - # break - # else: - # #print('couldnt find index') - # continue - - # print('all done') - index_path = os.path.join( - parent_dir.replace("\\", "/"), filename.replace("\\", "/") - ).replace("\\", "/") - # print(index_path) - return (index_path, index_path) - - else: - # print('nothing found') - return ("", "") - -def stoptraining(mim): - if int(mim) == 1: - CSVutil("csvdb/stop.csv", "w+", "stop", "True") - # p.terminate() - # p.kill() - try: - os.kill(PID, signal.SIGTERM) - except Exception as e: - print(f"Couldn't click due to {e}") - pass - else: - pass - - return ( - {"visible": False, "__type__": "update"}, - {"visible": True, "__type__": "update"}, - ) - -weights_dir = 'weights/' - -def note_to_hz(note_name): - SEMITONES = {'C': -9, 'C#': -8, 'D': -7, 'D#': -6, 'E': -5, 'F': -4, 'F#': -3, 'G': -2, 'G#': -1, 'A': 0, 'A#': 1, 'B': 2} - pitch_class, octave = note_name[:-1], int(note_name[-1]) - semitone = SEMITONES[pitch_class] - note_number = 12 * (octave - 4) + semitone - frequency = 440.0 * (2.0 ** (1.0/12)) ** note_number - return frequency - -def save_to_wav(record_button): - if record_button is None: - pass - else: - path_to_file=record_button - new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")+'.wav' - new_path='./audios/'+new_name - shutil.move(path_to_file,new_path) - return new_name -def save_to_wav2_edited(dropbox): - if dropbox is None: - pass - else: - file_path = dropbox.name - target_path = os.path.join('audios', os.path.basename(file_path)) - - if os.path.exists(target_path): - os.remove(target_path) - print('Replacing old dropdown file...') - - shutil.move(file_path, target_path) - return -def save_to_wav2(dropbox): - file_path = dropbox.name - target_path = os.path.join('audios', os.path.basename(file_path)) - - if os.path.exists(target_path): - os.remove(target_path) - print('Replacing old dropdown file...') - - shutil.move(file_path, target_path) - return target_path - -from gtts import gTTS -import edge_tts -import asyncio - - - - -def custom_voice( - _values, # filter indices - audio_files, # all audio files - model_voice_path='', - transpose=0, - f0method='pm', - index_rate_=float(0.66), - crepe_hop_length_=float(64), - f0_autotune=False, - file_index='', - file_index2='', - ): - - vc.get_vc(model_voice_path) - - - for _value_item in _values: - filename = "audio2/"+audio_files[_value_item] if _value_item != "converted_tts" else audio_files[0] - #filename = "audio2/"+audio_files[_value_item] - try: - print(audio_files[_value_item], model_voice_path) - except: - pass - info_, (sample_, audio_output_) = vc.vc_single_dont_save( - sid=0, - input_audio_path0=filename, #f"audio2/{filename}", - input_audio_path1=filename, #f"audio2/{filename}", - f0_up_key=transpose, # transpose for m to f and reverse 0 12 - f0_file=None, - f0_method= f0method, - file_index= file_index, # dir pwd? - file_index2= file_index2, - # file_big_npy1, - index_rate= index_rate_, - filter_radius= int(3), - resample_sr= int(0), - rms_mix_rate= float(0.25), - protect= float(0.33), - crepe_hop_length= crepe_hop_length_, - f0_autotune=f0_autotune, - f0_min=50, - note_min=50, - f0_max=1100, - note_max=1100 - ) - - sf.write( - file= filename, #f"audio2/{filename}", - samplerate=sample_, - data=audio_output_ - ) -def cast_to_device(tensor, device): - try: - return tensor.to(device) - except Exception as e: - print(e) - return tensor - - -def __bark__(text, voice_preset): - os.makedirs(os.path.join(now_dir,"tts"), exist_ok=True) - from transformers import AutoProcessor, BarkModel - device = "cuda:0" if torch.cuda.is_available() else "cpu" - dtype = torch.float32 if "cpu" in device else torch.float16 - bark_processor = AutoProcessor.from_pretrained( - "suno/bark-small", - cache_dir=os.path.join(now_dir,"tts","suno/bark"), - torch_dtype=dtype) - bark_model = BarkModel.from_pretrained( - "suno/bark-small", - cache_dir=os.path.join(now_dir,"tts","suno/bark"), - torch_dtype=dtype).to(device) - # bark_model.enable_cpu_offload() - inputs = bark_processor( - text=[text], - return_tensors="pt", - voice_preset=voice_preset - ) - tensor_dict = {k: cast_to_device(v,device) if hasattr(v,"to") else v for k, v in inputs.items()} - speech_values = bark_model.generate(**tensor_dict, do_sample=True) - sampling_rate = bark_model.generation_config.sample_rate - speech = speech_values.cpu().numpy().squeeze() - return speech, sampling_rate - - - -def make_test( - tts_text, - tts_voice, - model_path, - index_path, - transpose, - f0_method, - index_rate, - crepe_hop_length, - f0_autotune, - tts_method - ): - - if tts_voice == None: - return - - filename = os.path.join(now_dir, "audio-outputs", "converted_tts.wav") - if "SET_LIMIT" == os.getenv("DEMO"): - if len(tts_text) > 60: - tts_text = tts_text[:60] - print("DEMO; limit to 60 characters") - - language = tts_voice[:2] - if tts_method == "Edge-tts": - try: - #nest_asyncio.apply() # gradio;not - asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save(filename)) - except: - try: - tts = gTTS(tts_text, lang=language) - tts.save(filename) - tts.save - print(f'No audio was received. Please change the tts voice for {tts_voice}. USING gTTS.') - except: - tts = gTTS('a', lang=language) - tts.save(filename) - print('Error: Audio will be replaced.') - - os.system("cp audio-outputs/converted_tts.wav audio-outputs/real_tts.wav") - - custom_voice( - ["converted_tts"], # filter indices - ["audio-outputs/converted_tts.wav"], # all audio files - model_voice_path=model_path, - transpose=transpose, - f0method=f0_method, - index_rate_=index_rate, - crepe_hop_length_=crepe_hop_length, - f0_autotune=f0_autotune, - file_index='', - file_index2=index_path, - ) - return os.path.join(now_dir, "audio-outputs", "converted_tts.wav"), os.path.join(now_dir, "audio-outputs", "real_tts.wav") - elif tts_method == "Bark-tts": - try: - - script = tts_text.replace("\n", " ").strip() - sentences = sent_tokenize(script) - print(sentences) - silence = np.zeros(int(0.25 * SAMPLE_RATE)) - pieces = [] - nombre_archivo = os.path.join(now_dir, "audio-outputs", "bark_out.wav") - for sentence in sentences: - audio_array , _ = __bark__(sentence, tts_voice.split("-")[0]) - pieces += [audio_array, silence.copy()] - - sf.write( - file= nombre_archivo, - samplerate=SAMPLE_RATE, - data=np.concatenate(pieces) - ) - vc.get_vc(model_path) - info_, (sample_, audio_output_) = vc.vc_single_dont_save( - sid=0, - input_audio_path0=os.path.join(now_dir, "audio-outputs", "bark_out.wav"), #f"audio2/{filename}", - input_audio_path1=os.path.join(now_dir, "audio-outputs", "bark_out.wav"), #f"audio2/{filename}", - f0_up_key=transpose, # transpose for m to f and reverse 0 12 - f0_file=None, - f0_method=f0_method, - file_index= '', # dir pwd? - file_index2= index_path, - # file_big_npy1, - index_rate= index_rate, - filter_radius= int(3), - resample_sr= int(0), - rms_mix_rate= float(0.25), - protect= float(0.33), - crepe_hop_length= crepe_hop_length, - f0_autotune=f0_autotune, - f0_min=50, - note_min=50, - f0_max=1100, - note_max=1100 - ) - wavfile.write(os.path.join(now_dir, "audio-outputs", "converted_bark.wav"), rate=sample_, data=audio_output_) - return os.path.join(now_dir, "audio-outputs", "converted_bark.wav"), nombre_archivo - - except Exception as e: - print(f"{e}") - return None, None - - - - - - -def GradioSetup(UTheme=gr.themes.Soft()): - - default_weight = names[0] if names else '' - - with gr.Blocks(theme='JohnSmith9982/small_and_pretty', title="Applio") as app: - gr.Markdown("🍏 Applio (Mangio-RVC-Fork HF)") - gr.Markdown("More spaces: [Aesthetic_RVC_Inference_HF](https://huggingface.co/spaces/r3gm/Aesthetic_RVC_Inference_HF), [AICoverGen](https://huggingface.co/spaces/r3gm/AICoverGen), [Ultimate-Vocal-Remover-WebUI](https://huggingface.co/spaces/r3gm/Ultimate-Vocal-Remover-WebUI), [Advanced-RVC-Inference](https://huggingface.co/spaces/r3gm/Advanced-RVC-Inference)") - gr.HTML("