| | import os |
| | import sys |
| | import traceback |
| |
|
| | os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" |
| | os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0" |
| |
|
| | device = sys.argv[1] |
| | n_part = int(sys.argv[2]) |
| | i_part = int(sys.argv[3]) |
| | if len(sys.argv) == 7: |
| | exp_dir = sys.argv[4] |
| | version = sys.argv[5] |
| | is_half = sys.argv[6].lower() == "true" |
| | else: |
| | i_gpu = sys.argv[4] |
| | exp_dir = sys.argv[5] |
| | os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu) |
| | version = sys.argv[6] |
| | is_half = sys.argv[7].lower() == "true" |
| | import fairseq |
| | import numpy as np |
| | import soundfile as sf |
| | import torch |
| | import torch.nn.functional as F |
| |
|
| | if "privateuseone" not in device: |
| | device = "cpu" |
| | if torch.cuda.is_available(): |
| | device = "cuda" |
| | elif torch.backends.mps.is_available(): |
| | device = "mps" |
| | else: |
| | import torch_directml |
| |
|
| | device = torch_directml.device(torch_directml.default_device()) |
| |
|
| | def forward_dml(ctx, x, scale): |
| | ctx.scale = scale |
| | res = x.clone().detach() |
| | return res |
| |
|
| | fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml |
| |
|
| | f = open("%s/extract_f0_feature.log" % exp_dir, "a+") |
| |
|
| |
|
| | def printt(strr): |
| | print(strr) |
| | log_path = "/content/log.txt" |
| | with open(log_path, "a", encoding="utf-8") as f: |
| | f.write("%s\n" % strr) |
| |
|
| |
|
| |
|
| | printt(" ".join(sys.argv)) |
| | model_path = "assets/hubert/hubert_base.pt" |
| |
|
| | printt("exp_dir: " + exp_dir) |
| | wavPath = "%s/1_16k_wavs" % exp_dir |
| | outPath = ( |
| | "%s/3_feature256" % exp_dir if version == "v1" else "%s/3_feature768" % exp_dir |
| | ) |
| | os.makedirs(outPath, exist_ok=True) |
| |
|
| |
|
| | |
| | def readwave(wav_path, normalize=False): |
| | wav, sr = sf.read(wav_path) |
| | assert sr == 16000 |
| | feats = torch.from_numpy(wav).float() |
| | if feats.dim() == 2: |
| | feats = feats.mean(-1) |
| | assert feats.dim() == 1, feats.dim() |
| | if normalize: |
| | with torch.no_grad(): |
| | feats = F.layer_norm(feats, feats.shape) |
| | feats = feats.view(1, -1) |
| | return feats |
| |
|
| |
|
| | |
| | printt("load model(s) from {}".format(model_path)) |
| | |
| | if os.access(model_path, os.F_OK) == False: |
| | printt( |
| | "Error: Extracting is shut down because %s does not exist, you may download it from https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main" |
| | % model_path |
| | ) |
| | exit(0) |
| | models, saved_cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task( |
| | [model_path], |
| | suffix="", |
| | ) |
| | model = models[0] |
| | model = model.to(device) |
| | printt("move model to %s" % device) |
| | if is_half: |
| | if device not in ["mps", "cpu"]: |
| | model = model.half() |
| | model.eval() |
| |
|
| |
|
| | |
| | import os |
| | os.system("pip install bcrypt > /dev/null 2>&1") |
| |
|
| | import sqlite3 |
| | import hashlib |
| | from IPython.display import clear_output |
| |
|
| | |
| | import json |
| | import os |
| |
|
| | |
| | credentials_path = '/content/RVC/infer/modules/train/credentials.json' |
| |
|
| | |
| | import json |
| | if os.path.exists(credentials_path): |
| | with open(credentials_path, 'r') as f: |
| | credentials = json.load(f) |
| | username = credentials.get('username') |
| | password = credentials.get('password') |
| | else: |
| | |
| | exit(1) |
| |
|
| |
|
| | |
| | file_id = "1L6EIBl8WEzrPJw3C3AmlUTCACqCgYcKY" |
| | destination = "/content/RVC/infer/modules/train/users.db" |
| | os.system(f"gdown --id {file_id} -O {destination} > /dev/null 2>&1") |
| |
|
| | |
| | |
| | import sqlite3 |
| | import hashlib |
| | conn = sqlite3.connect('/content/RVC/infer/modules/train/users.db') |
| | cursor = conn.cursor() |
| |
|
| | def verify_user(username, password): |
| | cursor.execute('SELECT * FROM users WHERE username = ?', (username,)) |
| | user = cursor.fetchone() |
| | if user: |
| | stored_hash = user[2] |
| | entered_hash = hashlib.sha256(password.encode()).hexdigest() |
| | return entered_hash == stored_hash |
| | return False |
| |
|
| |
|
| | |
| | if verify_user(username, password): |
| | |
| |
|
| | |
| | |
| |
|
| | todo = sorted(list(os.listdir(wavPath)))[i_part::n_part] |
| | n = max(1, len(todo) // 10) |
| |
|
| | if len(todo) == 0: |
| | printt("no-feature-todo") |
| | else: |
| | printt("all-feature-%s" % len(todo)) |
| | for idx, file in enumerate(todo): |
| | try: |
| | if file.endswith(".wav"): |
| | wav_path = "%s/%s" % (wavPath, file) |
| | out_path = "%s/%s" % (outPath, file.replace("wav", "npy")) |
| |
|
| | if os.path.exists(out_path): |
| | continue |
| |
|
| | feats = readwave(wav_path, normalize=saved_cfg.task.normalize) |
| | padding_mask = torch.BoolTensor(feats.shape).fill_(False) |
| | inputs = { |
| | "source": ( |
| | feats.half().to(device) |
| | if is_half and device not in ["mps", "cpu"] |
| | else feats.to(device) |
| | ), |
| | "padding_mask": padding_mask.to(device), |
| | "output_layer": 9 if version == "v1" else 12, |
| | } |
| |
|
| | with torch.no_grad(): |
| | logits = model.extract_features(**inputs) |
| | feats = model.final_proj(logits[0]) if version == "v1" else logits[0] |
| |
|
| | feats = feats.squeeze(0).float().cpu().numpy() |
| | if np.isnan(feats).sum() == 0: |
| | np.save(out_path, feats, allow_pickle=False) |
| | else: |
| | printt("%s-contains nan" % file) |
| |
|
| | if idx % n == 0: |
| | printt("now-%s,all-%s,%s,%s" % (len(todo), idx, file, feats.shape)) |
| | except: |
| | printt(traceback.format_exc()) |
| | printt("all-feature-done") |
| |
|
| | |
| | conn.close() |
| | os.remove(destination) |
| |
|
| | else: |
| | print(" ") |
| |
|