YourMT3 / amt /src /config /data_presets.py
mimbres's picture
.
a03c9b4
""" data.py:
Data presets for training and evaluation.
Single Presets:
musicnet_mt3
musicnet_em
musicnet_thickstun
slakh
guitarset
...
Multi Presets:
all_mmegs
...
"""
from config.vocabulary import *
from config.vocabulary import drum_vocab_presets, program_vocab_presets
from utils.utils import deduplicate_splits, merge_splits, merge_vocab
data_preset_single_cfg = {
"musicnet_mt3": {
"eval_vocab": [MUSICNET_INSTR_CLASS],
"dataset_name": "musicnet",
"train_split": "train_mt3",
"validation_split": "validation_mt3_acoustic",
"test_split": "test_mt3_acoustic",
"has_stem": False,
},
"musicnet_mt3_synth_only": { # sanity-check
"eval_vocab": [MUSICNET_INSTR_CLASS],
"dataset_name": "musicnet",
"train_split": "train_mt3_synth",
"validation_split": "validation_mt3_synth",
"test_split": "test_mt3_acoustic",
"has_stem": False,
},
"musicnet_mt3_em": {
"eval_vocab": [MUSICNET_INSTR_CLASS],
"dataset_name": "musicnet",
"train_split": "train_mt3_em",
"validation_split": "validation_mt3_em",
"test_split": "test_mt3_em",
"has_stem": False,
},
"musicnet_thickstun": { # exp4
"eval_vocab": [MUSICNET_INSTR_CLASS],
"dataset_name": "musicnet",
"train_split": "train_thickstun",
"validation_split": "test_thickstun",
"test_split": "test_thickstun",
"has_stem": False,
},
"musicnet_thickstun_em": { # NOTE: this is not the use of external 'synth' in the paper, but the use of 'synth' within the dataset
"eval_vocab": [MUSICNET_INSTR_CLASS],
"dataset_name": "musicnet",
"train_split": "train_thickstun_em",
"validation_split": "test_thickstun_em",
"test_split": "test_thickstun_em",
"has_stem": False,
},
"musicnet_thickstun_ext": { # exp4
"eval_vocab": [MUSICNET_INSTR_CLASS],
"dataset_name": "musicnet",
"train_split": "train_thickstun",
"validation_split": "test_thickstun_ext",
"test_split": "test_thickstun_ext",
"has_stem": False,
},
"musicnet_thickstun_ext_em": { # NOTE: this is not the use of external 'synth' in the paper, but the use of 'synth' within the dataset
"eval_vocab": [MUSICNET_INSTR_CLASS],
"dataset_name": "musicnet",
"train_split": "train_thickstun_em",
"validation_split": "test_thickstun_ext_em",
"test_split": "test_thickstun_ext_em",
"has_stem": False,
},
"maps_default": {
"eval_vocab": [PIANO_SOLO_CLASS],
"dataset_name": "maps",
"train_split": "train",
"validation_split": "test",
"test_split": "test",
"has_stem": False,
},
"maps_all": {
"eval_vocab": [None],
"dataset_name": "maps",
"train_split": "all",
"validation_split": None,
"test_split": None,
"has_stem": False,
},
"maestro": {
"eval_vocab": [PIANO_SOLO_CLASS],
"dataset_name": "maestro",
"train_split": "train",
"validation_split": "validation",
"test_split": "test",
"has_stem": False,
},
"maestro_final": {
"eval_vocab": [PIANO_SOLO_CLASS],
"dataset_name": "maestro",
"train_split": merge_splits(["train", "validation"], dataset_name="maestro"),
"validation_split": "test",
"test_split": "test",
"has_stem": False,
},
"guitarset": { # 4 random players for train, 1 for valid, and 1 for test
"eval_vocab": [GUITAR_SOLO_CLASS],
"dataset_name": "guitarset",
"train_split": "train",
"validation_split": "validation",
"test_split": "test",
"has_stem": False,
},
"guitarset_pshift": { # guitarset + pitch shift
"eval_vocab": [GUITAR_SOLO_CLASS],
"dataset_name": "guitarset",
"train_split": "train_pshift",
"validation_split": "validation",
"test_split": "test",
"has_stem": False,
},
"guitarset_progression": { # progression 1 and 2 as train, progression 3 as test
"eval_vocab": [GUITAR_SOLO_CLASS],
"dataset_name": "guitarset",
"train_split": merge_splits(["progression_1", "progression_2"], dataset_name="guitarset"),
"validation_split": "progression_3",
"test_split": "progression_3",
"has_stem": False,
},
"guitarset_progression_pshift": { # guuitarset_progression + pitch shift
"eval_vocab": [GUITAR_SOLO_CLASS],
"dataset_name": "guitarset",
"train_split": merge_splits(["progression_1_pshift", "progression_2_pshift"], dataset_name="guitarset"),
"validation_split": "progression_3",
"test_split": "progression_3",
"has_stem": False,
},
"guitarset_minus_bn": { # guuitarset_style + pitch shift
"eval_vocab": [GUITAR_SOLO_CLASS],
"dataset_name": "guitarset",
"train_split": merge_splits(["Funk_pshift", "SS_pshift", "Jazz_pshift", "Rock_pshift"],
dataset_name="guitarset"),
"validation_split": "BN",
"test_split": "BN",
"has_stem": False,
},
"guitarset_minus_funk": { # guuitarset_style + pitch shift
"eval_vocab": [GUITAR_SOLO_CLASS],
"dataset_name": "guitarset",
"train_split": merge_splits(["BN_pshift", "SS_pshift", "Jazz_pshift", "Rock_pshift"],
dataset_name="guitarset"),
"validation_split": "Funk",
"test_split": "Funk",
"has_stem": False,
},
"guitarset_minus_ss": { # guuitarset_style + pitch shift
"eval_vocab": GUITAR_SOLO_CLASS,
"dataset_name": "guitarset",
"train_split": merge_splits(["BN_pshift", "Funk_pshift", "Jazz_pshift", "Rock_pshift"],
dataset_name="guitarset"),
"validation_split": "SS",
"test_split": "SS",
"has_stem": False,
},
"guitarset_minus_jazz": { # guuitarset_style + pitch shift
"eval_vocab": [GUITAR_SOLO_CLASS],
"dataset_name": "guitarset",
"train_split": merge_splits(["BN_pshift", "Funk_pshift", "SS_pshift", "Rock_pshift"],
dataset_name="guitarset"),
"validation_split": "Jazz",
"test_split": "Jazz",
"has_stem": False,
},
"guitarset_minus_rock": { # guuitarset_style + pitch shift
"eval_vocab": [GUITAR_SOLO_CLASS],
"dataset_name": "guitarset",
"train_split": merge_splits(["BN_pshift", "Funk_pshift", "SS_pshift", "Jazz_pshift"],
dataset_name="guitarset"),
"validation_split": "Rock",
"test_split": "Rock",
"has_stem": False,
},
"guitarset_all": {
"eval_vocab": [None],
"dataset_name": "guitarset",
"train_split": "all",
"validation_split": None,
"test_split": None,
"has_stem": False,
},
"enstdrums_dtp": {
"eval_vocab": [None],
"eval_drum_vocab": drum_vocab_presets["ksh"],
"dataset_name": "enstdrums",
"train_split": merge_splits(["drummer_1_dtp", "drummer_2_dtp", "drummer_1_dtp", "drummer_2_dtp"], dataset_name="enstdrums"),
"validation_split": "drummer_1_dtp", # for sanity check
"test_split": "drummer_3_dtp",
"has_stem": False,
},
"enstdrums_dtm": {
"eval_vocab": [None],
"eval_drum_vocab": drum_vocab_presets["ksh"],
"dataset_name": "enstdrums",
"train_split": merge_splits(["drummer_1_dtm", "drummer_2_dtm", "drummer_1_dtp", "drummer_2_dtp"], dataset_name="enstdrums"),
"validation_split": "drummer_3_dtm_r2", # 0.6 * drum
"test_split": "drummer_3_dtm_r1", # 0.75 * drum
"has_stem": True,
},
"enstdrums_random_dtm": { # single dataset training as a denoising ADT model
"eval_vocab": [None],
"eval_drum_vocab": drum_vocab_presets["ksh"],
"dataset_name": "enstdrums",
"train_split": "train_dtm",
"validation_split": "validation_dtm",
"test_split": "test_dtm",
"has_stem": True,
},
"enstdrums_random": { # multi dataset training with random split of 70:15:15
"eval_vocab": [None],
"eval_drum_vocab": drum_vocab_presets["ksh"],
"dataset_name": "enstdrums",
"train_split": "train_dtp",
"validation_split": "test_dtm",
"test_split": "test_dtm",
"has_stem": True,
},
"enstdrums_random_plus_dtd": { # multi dataset training plus dtd
"eval_vocab": [None],
"eval_drum_vocab": drum_vocab_presets["ksh"],
"dataset_name": "enstdrums",
"train_split": merge_splits(["train_dtp", "all_dtd"], dataset_name="enstdrums"),
"validation_split": "test_dtm",
"test_split": "test_dtm",
"has_stem": True,
},
"mir_st500": {
"eval_vocab": [SINGING_SOLO_CLASS],
"dataset_name": "mir_st500",
"train_split": "train_stem",
"validation_split": "test",
"test_split": "test",
"has_stem": True,
},
"mir_st500_voc": {
"eval_vocab": [SINGING_SOLO_CLASS],
"dataset_name": "mir_st500",
"train_split": "train_vocal",
"validation_split": "test_vocal",
"test_split": "test_vocal",
"has_stem": False,
},
"mir_st500_voc_debug": { # using train_vocal for test (for debugging)
"eval_vocab": [SINGING_SOLO_CLASS],
"dataset_name": "mir_st500",
"train_split": "train_vocal",
"validation_split": "test_vocal",
"test_split": "train_vocal",
"has_stem": False,
},
"slakh": {
"eval_vocab": [GM_INSTR_CLASS],
"eval_drum_vocab": drum_vocab_presets["gm"],
"dataset_name": "slakh",
"train_split": "train",
"validation_split": "validation",
"test_split": "test",
"has_stem": True,
},
"slakh_final": {
"eval_vocab": [GM_INSTR_CLASS],
"eval_drum_vocab": drum_vocab_presets["gm"],
"dataset_name": "slakh",
"train_split": merge_splits(["train", "validation"], dataset_name="slakh"),
"validation_split": "test",
"test_split": "test",
"has_stem": True,
},
"rwc_pop_bass": {
"eval_vocab": [BASS_SOLO_CLASS],
"add_pitch_class_metric": ["Bass"],
"dataset_name": "rwc_pop",
"train_split": None,
"validation_split": "bass",
"test_split": "bass",
"has_stem": False,
},
"rwc_pop_full": {
"eval_vocab": [GM_INSTR_CLASS_PLUS],
"add_pitch_class_metric": list(GM_INSTR_CLASS_PLUS.keys()),
"dataset_name": "rwc_pop",
"train_split": None,
"validation_split": "full",
"test_split": "full",
"has_stem": False,
},
"egmd": {
"eval_vocab": [None],
"eval_drum_vocab": drum_vocab_presets["ksh"],
"dataset_name": "egmd",
"train_split": "train",
"validation_split": "validation",
"test_split": "test_reduced", # EGMD has 5000+ test files, so we reudce it to 200 files to save time
# "train_limit_num_files": 4402, #8804, # 17608, # limit the number of files for training to random choice of half.
"has_stem": False,
},
"urmp": {
"eval_vocab": [GM_INSTR_CLASS],
"dataset_name": "urmp",
"train_split": "train",
"validation_split": "test",
"test_split": "test",
"has_stem": True,
},
"cmedia": {
"eval_vocab": [SINGING_SOLO_CLASS],
"dataset_name": "cmedia",
"train_split": "train_stem",
"validation_split": "train",
"test_split": "train",
"has_stem": True,
},
"cmedia_voc": {
"eval_vocab": [SINGING_SOLO_CLASS],
"dataset_name": "cmedia",
"train_split": "train_vocal",
"validation_split": "train_vocal",
"test_split": "train_vocal",
"has_stem": False,
},
"idmt_smt_bass": {
"eval_vocab": [BASS_SOLO_CLASS],
"dataset_name": "idmt_smt_bass",
"train_split": "train",
"validation_split": "validation",
"test_split": "validation",
"has_stem": False,
},
"geerdes": { # full mix dataset for evaluation
"eval_vocab": [GM_INSTR_CLASS_PLUS],
"dataset_name": "geerdes",
"train_split": None,
"validation_split": None,
"test_split": "all",
"has_stem": False,
},
"geerdes_sep": { # Using vocal/accomp separation for evalutation
"eval_vocab": [GM_INSTR_CLASS_PLUS],
"dataset_name": "geerdes",
"train_split": None,
"validation_split": None,
"test_split": "all_sep",
"has_stem": False,
},
"geerdes_half": { # Using half dataset for train/val
"eval_vocab": [GM_INSTR_CLASS_PLUS],
"dataset_name": "geerdes",
"train_split": "train",
"validation_split": "validation",
"test_split": "validation",
"has_stem": False,
},
"geerdes_half_sep": { # Using half dataset with vocal/accomp separation for train/val
"eval_vocab": [GM_INSTR_CLASS_PLUS],
"dataset_name": "geerdes",
"train_split": "train_sep",
"validation_split": "validation_sep",
"test_split": "validation_sep",
"has_stem": False,
},
}
data_preset_multi_cfg = {
"musicnet_mt3_em_synth_plus_maps": {
"presets": ["musicnet_mt3_em_synth", "maps_all"],
"weights": [0.6, 0.4],
"eval_vocab": [MUSICNET_INSTR_CLASS],
},
"musicnet_em_synth_table2_plus_maps": {
"presets": ["musicnet_em_synth_table2", "maps_all"],
"weights": [0.6, 0.4],
"eval_vocab": [MUSICNET_INSTR_CLASS],
},
"musicnet_em_synth_table2_plus_maps_multi": {
"presets": ["musicnet_em_synth_table2", "maps_default"],
"weights": [0.6, 0.4],
"eval_vocab": [MUSICNET_INSTR_CLASS],
},
"guitarset_progression_plus_maps": {
"presets": ["guitarset_progression", "maps_all"],
"weights": [0.5, 0.5],
"eval_vocab": [GUITAR_SOLO_CLASS],
},
"guitarset_pshift_plus_maps": {
"presets": ["guitarset_pshift", "maps_default"],
"weights": [0.6, 0.4],
"eval_vocab": [merge_vocab([GUITAR_SOLO_CLASS, PIANO_SOLO_CLASS])],
},
"guitarset_pshift_plus_musicnet_thick": {
"presets": ["guitarset_pshift", "musicnet_thickstun_em"],
"weights": [0.5, 0.5],
"eval_vocab": [merge_vocab([GUITAR_SOLO_CLASS, PIANO_SOLO_CLASS])],
},
"multi_sanity_check": {
"presets": ["musicnet_mt3_synth_only", "musicnet_mt3_synth_only"],
"weights": [0.6, 0.4],
"eval_vocab": [MUSICNET_INSTR_CLASS],
},
"all_mmegs": {
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", "guitarset_pshift"
],
"weights": [0.2, 0.2, 0.2, 0.2, 0.2],
"eval_vocab": [None] * 5, # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_gt_cv0": {
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", "guitarset_minus_bn"
],
"weights": [0.2, 0.2, 0.2, 0.2, 0.2],
"eval_vocab": [None] * 5, # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_gt_cv1": {
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp",
"guitarset_minus_funk"
],
"weights": [0.2, 0.2, 0.2, 0.2, 0.2],
"eval_vocab": [None] * 5, # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_gt_cv2": {
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", "guitarset_minus_ss"
],
"weights": [0.2, 0.2, 0.2, 0.2, 0.2],
"eval_vocab": [None] * 5, # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_gt_cv3": {
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp",
"guitarset_minus_rock"
],
"weights": [0.2, 0.2, 0.2, 0.2, 0.2],
"eval_vocab": [None] * 5, # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_gt_cv4": {
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp",
"guitarset_minus_jazz"
],
"weights": [0.2, 0.2, 0.2, 0.2, 0.2],
"eval_vocab": [None] * 5, # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_enstdrums_random": {
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_random", "guitarset"
],
"weights": [0.2, 0.2, 0.2, 0.2, 0.2],
"eval_vocab": [None] * 5, # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_plus_egmd": {
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_random_plus_dtd",
"guitarset", "egmd"
],
"weights": [0.2, 0.2, 0.2, 0.1, 0.1, 0.2],
"eval_vocab": [None] * 6, # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_dtp_egmd": {
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", "guitarset", "egmd"
],
"weights": [0.2, 0.2, 0.2, 0.1, 0.1, 0.2],
"eval_vocab": [None] * 6, # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_weighted_slakh": {
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", "guitarset_pshift", "egmd"
],
"weights": [0.5, 0.1, 0.1, 0.05, 0.05, 0.2],
"eval_vocab": [None] * 6, # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_weighted_mt3": { # for comparison with MT3
"presets": [
"slakh", "musicnet_mt3", "mir_st500_voc", "enstdrums_dtp",
"guitarset_progression_pshift", "egmd"
],
"weights": [0.5, 0.1, 0.1, 0.05, 0.05, 0.2],
"eval_vocab": [None] * 6, # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_weighted_mt3_em": { # musicnet_mt3_em
"presets": [
"slakh", "musicnet_mt3_em", "mir_st500_voc", "enstdrums_dtp",
"guitarset_progression_pshift", "egmd"
],
"weights": [0.5, 0.1, 0.1, 0.05, 0.05, 0.2],
"eval_vocab": [None] * 6, # None means instrument-agnoßstic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_urmp": {
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp",
"guitarset_pshift", "egmd", "urmp"
],
"weights": [0.5, 0.2, 0.1, 0.05, 0.05, 0.05, 0.1],
"eval_vocab": [None] * 7, # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_urmp_mt3": { # for comparison with MT3 including URMP
"presets": [
"slakh", "musicnet_mt3", "mir_st500_voc", "enstdrums_dtp",
"guitarset_progression", "egmd", "urmp"
],
"weights": [0.5, 0.2, 0.1, 0.05, 0.05, 0.0125, 0.1],
"eval_vocab": [None] * 7, # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_urmp_mt3_em": { # musicnet_mt3_em including URMP
"presets": [
"slakh", "musicnet_mt3_em", "mir_st500_voc", "enstdrums_dtp",
"guitarset_progression", "egmd", "urmp"
],
"weights": [0.5, 0.2, 0.1, 0.05, 0.05, 0.0125, 0.1],
"eval_vocab": [None] * 7, # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_maestro": { # including Mestro and URMP
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp",
"guitarset_pshift", "egmd", "urmp", "maestro"
],
"weights": [0.5, 0.1, 0.125, 0.075, 0.025, 0.01, 0.1, 0.1],
"eval_vocab": [None] * 8, # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_maestro_mt3": { # for comparison with MT3 including URMP
"presets": [
"slakh", "musicnet_mt3", "mir_st500_voc", "enstdrums_dtp",
"guitarset_progression", "egmd", "urmp", "maestro"
],
"weights": [0.5, 0.1, 0.1, 0.05, 0.05, 0.0125, 0.1, 0.1],
"eval_vocab": [None] * 8, # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_maestro_mt3_em": { # musicnet_mt3_em including URMP
"presets": [
"slakh", "musicnet_mt3_em", "mir_st500_voc", "enstdrums_dtp",
"guitarset_progression", "egmd", "urmp", "maestro"
],
"weights": [0.5, 0.1, 0.1, 0.05, 0.05, 0.0125, 0.1, 0.1],
"eval_vocab": [None] * 8, # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"singing_v1": { # slakh + mir_st500 without spleeter
"presets": ["slakh", "mir_st500"],
"weights": [0.8, 0.2],
"eval_vocab": [None, SINGING_SOLO_CLASS], # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_singing_v1": { # for singing-only task
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_stem", "enstdrums_dtp",
"guitarset_pshift", "egmd", "urmp", "maestro"
],
"weights": [0.5, 0.1, 0.1, 0.05, 0.05, 0.0125, 0.1, 0.1],
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None], # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_singing_drum_v1": { # for singing-only and drum-only tasks
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_stem", "enstdrums_dtm",
"guitarset_pshift", "egmd", "urmp", "maestro"
],
"weights": [0.5, 0.1, 0.1, 0.05, 0.05, 0.0125, 0.1, 0.1],
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None], # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_cross": { # including Mestro and URMP
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp",
"guitarset_pshift", "egmd", "urmp", "maestro"
],
"weights": [0.5, 0.1, 0.125, 0.075, 0.025, 0.01, 0.1, 0.1],
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None], # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_cross_rebal": { # rebalanced for cross-augment, using spleeter
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp",
"guitarset_pshift", "egmd", "urmp", "maestro"
],
"weights": [0.4, 0.15, 0.15, 0.075, 0.025, 0.01, 0.1, 0.1],
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None], # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_cross_rebal2": { # rebalanced for cross-augment, using spleeter
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp",
"guitarset_pshift", "egmd", "urmp", "maestro"
],
"weights": [0.275, 0.19, 0.19, 0.1, 0.025, 0.02, 0.1, 0.1],
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None], # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_cross_rebal4": { # rebalanced for cross-augment, using spleeter
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp",
"guitarset_pshift", "egmd", "urmp", "maestro"
],
"weights": [0.258, 0.19, 0.2, 0.125, 0.022, 0.005, 0.1, 0.1],
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None], # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_cross_rebal5": { # rebalanced for cross-augment, using spleeter
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp",
"guitarset_pshift", "egmd", "urmp", "maestro"
],
"weights": [0.295, 0.19, 0.24, 0.05, 0.02, 0.005, 0.1, 0.1],
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None], # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_cross_stem": { # accomp stem for sub-task learning + rebalanced for cross-augment
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_stem", "enstdrums_dtm",
"guitarset_pshift", "egmd", "urmp", "maestro"
],
"weights": [0.4, 0.15, 0.15, 0.075, 0.025, 0.01, 0.1, 0.1],
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None], # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_cross_stem_rebal3": { # accomp stem for sub-task learning + rebalanced for cross-augment
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_stem", "enstdrums_dtm",
"guitarset_pshift", "egmd", "urmp", "maestro"
],
"weights": [0.265, 0.18, 0.21, 0.1, 0.025, 0.02, 0.1, 0.1],
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None], # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_cross_v6": { # +cmeida +idmt_smt_bass
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp",
"guitarset", "egmd", "urmp", "maestro", "idmt_smt_bass", "cmedia_voc",
],
"weights": [0.295, 0.19, 0.19, 0.05, 0.01, 0.005, 0.1, 0.1, 0.01, 0.05],
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None, BASS_SOLO_CLASS, SINGING_SOLO_CLASS], # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_cross_v6_geerdes": { # +geerdes_half
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp",
"guitarset", "egmd", "urmp", "maestro", "idmt_smt_bass", "cmedia_voc",
"geerdes_half", "geerdes_half_sep"
],
"weights": [0.295, 0.19, 0.19, 0.05, 0.01, 0.005, 0.075, 0.075, 0.01, 0.05, 0.025, 0.025],
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None, BASS_SOLO_CLASS,
SINGING_SOLO_CLASS, GM_INSTR_CLASS_PLUS, GM_INSTR_CLASS_PLUS], # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_cross_v6_geerdes_rebal": { # +geerdes_half
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp",
"guitarset", "egmd", "urmp", "maestro", "idmt_smt_bass", "cmedia_voc",
"geerdes_half", "geerdes_half_sep"
],
"weights": [0.245, 0.175, 0.19, 0.05, 0.01, 0.005, 0.075, 0.05, 0.01, 0.05, 0.075, 0.075],
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None, BASS_SOLO_CLASS,
SINGING_SOLO_CLASS, GM_INSTR_EXT_CLASS_PLUS, GM_INSTR_EXT_CLASS_PLUS], # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_cross_v7": {
"presets": [
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp",
"guitarset_progression_pshift", "egmd", "urmp", "maestro", "idmt_smt_bass", "cmedia_voc",
],
"weights": [0.295, 0.19, 0.191, 0.05, 0.01, 0.004, 0.1, 0.1, 0.01, 0.05],
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None, BASS_SOLO_CLASS, SINGING_SOLO_CLASS], # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_cross_final": {
"presets": [
"slakh_final", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp",
"guitarset_progression_pshift", "egmd", "urmp", "maestro_final", "idmt_smt_bass", "cmedia_voc",
],
"weights": [0.295, 0.19, 0.191, 0.05, 0.01, 0.004, 0.1, 0.1, 0.01, 0.05],
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None, BASS_SOLO_CLASS, SINGING_SOLO_CLASS], # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"all_eval_final": { # The final evaluation set
"presets": [
"slakh", "musicnet_thickstun", "musicnet_thickstun_em", "musicnet_thickstun_ext",
"musicnet_thickstun_ext_em", "mir_st500_voc", "mir_st500", "enstdrums_dtp",
"enstdrums_dtm", "guitarset_progression_pshift", "rwc_pop_bass", "maestro", "urmp",
"maps_default", "rwc_pop_full", # "geerdes", "geerdes_sep",
],
"eval_vocab": [
GM_INSTR_CLASS, MUSICNET_INSTR_CLASS, MUSICNET_INSTR_CLASS, MUSICNET_INSTR_CLASS,
MUSICNET_INSTR_CLASS, SINGING_SOLO_CLASS, SINGING_SOLO_CLASS, None,
None, None, BASS_SOLO_CLASS, PIANO_SOLO_CLASS, GM_INSTR_CLASS,
PIANO_SOLO_CLASS, GM_INSTR_CLASS_PLUS, # GM_INSTR_CLASS_PLUS, GM_INSTR_CLASS_PLUS
],
"eval_drum_vocab": drum_vocab_presets["ksh"],
},
"geerdes_eval": { # Geerdes evaluation sets for models trained without Geerdes.
"presets": ["geerdes_sep", "geerdes"],
"eval_vocab": [GM_INSTR_CLASS_PLUS, GM_INSTR_CLASS_PLUS],
"eval_drum_vocab": drum_vocab_presets["gm"],
},
"geerdes_half_eval": { # Geerdes evaluation sets for models trained with Geerdes-half
"presets": ["geerdes_half_sep", "geerdes_half"],
"eval_vocab": [GM_INSTR_CLASS_PLUS, GM_INSTR_CLASS_PLUS],
"eval_drum_vocab": drum_vocab_presets["gm"],
},
"minimal": { # slakh + mir_st500 with spleeter
"presets": ["slakh", "mir_st500_voc"],
"weights": [0.8, 0.2],
"eval_vocab": [None, SINGING_SOLO_CLASS], # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
"singing_debug": { # slakh + mir_st500 with spleeter
"presets": ["mir_st500_voc_debug"],
"weights": [1.0],
"eval_vocab": [SINGING_SOLO_CLASS], # None means instrument-agnostic F1 for each dataset
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric
"val_max_num_files": 20, # max 20 files per dataset
"test_max_num_files": None,
},
}