# Copyright (c) 2023 Amphion. # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. """ For source datasets' standard samples """ from collections import defaultdict import os import json SPEECH_DATASETS = ["vctk", "vctksample"] GOLDEN_TEST_SAMPLES = defaultdict(list) GOLDEN_TEST_SAMPLES["m4singer"] = [ "Alto-1_美错_0014", "Bass-1_十年_0008", "Soprano-2_同桌的你_0018", "Tenor-5_爱笑的眼睛_0010", ] GOLDEN_TEST_SAMPLES["svcc"] = [ # IDF1 "IDF1_10030", "IDF1_10120", "IDF1_10140", # IDM1 "IDM1_10001", "IDM1_10030", "IDM1_10120", # CDF1 "CDF1_10030", "CDF1_10120", "CDF1_10140", # CDM1 "CDM1_10001", "CDM1_10030", "CDM1_10120", ] GOLDEN_TEST_SAMPLES["svcceval"] = [ # SF1 "SF1_30001", "SF1_30002", "SF1_30003", # SM1 "SM1_30001", "SM1_30002", "SM1_30003", ] GOLDEN_TEST_SAMPLES["popbutfy"] = [ "Female1#you_are_my_sunshine_Professional#0", "Female4#Someone_Like_You_Professional#10", "Male2#Lemon_Tree_Professional#12", "Male5#can_you_feel_the_love_tonight_Professional#20", ] GOLDEN_TEST_SAMPLES["opensinger"] = [ "Man_0_大鱼_10", "Man_21_丑八怪_14", "Woman_39_mojito_22", "Woman_40_易燃易爆炸_12", ] GOLDEN_TEST_SAMPLES["nus48e"] = [ "ADIZ_read#01#0000", "MCUR_sing#10#0000", "JLEE_read#08#0001", "SAMF_sing#18#0001", ] GOLDEN_TEST_SAMPLES["popcs"] = [ "明天会更好_0004", "欧若拉_0005", "虫儿飞_0006", "隐形的翅膀_0008", ] GOLDEN_TEST_SAMPLES["kising"] = [ "421_0040", "424_0013", "431_0026", ] GOLDEN_TEST_SAMPLES["csd"] = [ "en_004a_0001", "en_042b_0006", "kr_013a_0006", "kr_045b_0004", ] GOLDEN_TEST_SAMPLES["opera"] = [ "fem_01#neg_1#0000", "fem_12#pos_3#0003", "male_02#neg_1#0002", "male_11#pos_2#0001", ] GOLDEN_TEST_SAMPLES["lijian"] = [ "058矜持_0000", "079绒花_0000", "120遥远的天空底下_0000", ] GOLDEN_TEST_SAMPLES["cdmusiceval"] = ["陶喆_普通朋友", "蔡琴_给电影人的情书"] GOLDEN_TRAIN_SAMPLES = defaultdict(list) def get_golden_samples_indexes( dataset_name, dataset_dir=None, cfg=None, split=None, min_samples=5, ): """ # Get Standard samples' indexes """ if dataset_dir is None: assert cfg is not None dataset_dir = os.path.join( cfg.OUTPUT_PATH, "preprocess/{}_version".format(cfg.PREPROCESS_VERSION), dataset_name, ) assert split is not None utt_file = os.path.join(dataset_dir, "{}.json".format(split)) with open(utt_file, "r", encoding="utf-8") as f: samples = json.load(f) if "train" in split: golden_samples = GOLDEN_TRAIN_SAMPLES[dataset_name] if "test" in split: golden_samples = GOLDEN_TEST_SAMPLES[dataset_name] res = [] for idx, utt in enumerate(samples): if utt["Uid"] in golden_samples: res.append(idx) if dataset_name == "cdmusiceval": if "_".join(utt["Uid"].split("_")[:2]) in golden_samples: res.append(idx) if len(res) == 0: res = [i for i in range(min_samples)] return res def get_specific_singer_indexes(dataset_dir, singer_name, split): utt_file = os.path.join(dataset_dir, "{}.json".format(split)) with open(utt_file, "r", encoding="utf-8") as f: samples = json.load(f) res = [] for idx, utt in enumerate(samples): if utt["Singer"] == singer_name: res.append(idx) assert len(res) != 0 return res def get_uids_and_wav_paths( cfg, dataset, dataset_type="train", only_specific_singer=None, return_singers=False ): dataset_dir = os.path.join( cfg.OUTPUT_PATH, "preprocess/{}_version".format(cfg.PREPROCESS_VERSION), dataset ) dataset_file = os.path.join( dataset_dir, "{}.json".format(dataset_type.split("_")[-1]) ) with open(dataset_file, "r") as f: utterances = json.load(f) indexes = range(len(utterances)) if "golden" in dataset_type: # golden_train or golden_test indexes = get_golden_samples_indexes( dataset, dataset_dir, split=dataset_type.split("_")[-1] ) if only_specific_singer is not None: indexes = get_specific_singer_indexes( dataset_dir, only_specific_singer, dataset_type ) uids = [utterances[i]["Uid"] for i in indexes] wav_paths = [utterances[i]["Path"] for i in indexes] singers = [utterances[i]["Singer"] for i in indexes] if not return_singers: return uids, wav_paths else: return uids, wav_paths, singers