Christian J. Steinmetz commited on
Commit
9f855cb
1 Parent(s): 732603c

support for 5 datasets

Browse files
Files changed (1) hide show
  1. remfx/datasets.py +91 -10
remfx/datasets.py CHANGED
@@ -19,9 +19,10 @@ from remfx.utils import create_sequential_chunks
19
  # https://zenodo.org/record/1193957 -> VocalSet
20
 
21
  ALL_EFFECTS = effects.Pedalboard_Effects
 
22
 
23
 
24
- singer_splits = {
25
  "train": [
26
  "male1",
27
  "male2",
@@ -44,6 +45,94 @@ singer_splits = {
44
  "test": ["male11", "female9"],
45
  }
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  class VocalSet(Dataset):
49
  def __init__(
@@ -82,15 +171,7 @@ class VocalSet(Dataset):
82
  self.effects_to_keep = self.validate_effect_input()
83
  self.proc_root = self.render_root / "processed" / effects_string / self.mode
84
 
85
- # find all singer directories
86
- singer_dirs = glob.glob(os.path.join(self.root, "data_by_singer", "*"))
87
- singer_dirs = [
88
- sd for sd in singer_dirs if os.path.basename(sd) in singer_splits[mode]
89
- ]
90
- self.files = []
91
- for singer_dir in singer_dirs:
92
- self.files += glob.glob(os.path.join(singer_dir, "**", "**", "*.wav"))
93
- self.files = sorted(self.files)
94
 
95
  if self.proc_root.exists() and len(list(self.proc_root.iterdir())) > 0:
96
  print("Found processed files.")
 
19
  # https://zenodo.org/record/1193957 -> VocalSet
20
 
21
  ALL_EFFECTS = effects.Pedalboard_Effects
22
+ print(ALL_EFFECTS)
23
 
24
 
25
+ vocalset_splits = {
26
  "train": [
27
  "male1",
28
  "male2",
 
45
  "test": ["male11", "female9"],
46
  }
47
 
48
+ guitarset_splits = {"train": ["00", "01", "02", "03"], "val": ["04"], "test": ["05"]}
49
+ idmt_guitar_splits = {
50
+ "train": ["classical", "country_folk", "jazz", "latin", "metal", "pop"],
51
+ "val": ["reggae", "ska"],
52
+ "test": ["rock", "blues"],
53
+ }
54
+ idmt_bass_splits = {
55
+ "train": ["BE", "BEQ"],
56
+ "val": ["VIF"],
57
+ "test": ["VIS"],
58
+ }
59
+ idmt_drums_splits = {
60
+ "train": ["WaveDrum02", "TechnoDrum01"],
61
+ "val": ["RealDrum01"],
62
+ "test": ["TechnoDrum02", "WaveDrum01"],
63
+ }
64
+
65
+
66
+ def locate_files(root: str, mode: str):
67
+ file_list = []
68
+ # ------------------------- VocalSet -------------------------
69
+ vocalset_dir = os.path.join(root, "VocalSet1-2")
70
+ if os.path.isdir(vocalset_dir):
71
+ # find all singer directories
72
+ singer_dirs = glob.glob(os.path.join(vocalset_dir, "data_by_singer", "*"))
73
+ singer_dirs = [
74
+ sd for sd in singer_dirs if os.path.basename(sd) in vocalset_splits[mode]
75
+ ]
76
+ files = []
77
+ for singer_dir in singer_dirs:
78
+ files += glob.glob(os.path.join(singer_dir, "**", "**", "*.wav"))
79
+ print(f"Found {len(files)} files in VocalSet {mode}.")
80
+ file_list += sorted(files)
81
+ # ------------------------- GuitarSet -------------------------
82
+ guitarset_dir = os.path.join(root, "audio_mono-mic")
83
+ if os.path.isdir(guitarset_dir):
84
+ files = glob.glob(os.path.join(guitarset_dir, "*.wav"))
85
+ files = [
86
+ f
87
+ for f in files
88
+ if os.path.basename(f).split("_")[0] in guitarset_splits[mode]
89
+ ]
90
+ print(f"Found {len(files)} files in GuitarSet {mode}.")
91
+ file_list += sorted(files)
92
+ # ------------------------- IDMT-SMT-GUITAR -------------------------
93
+ idmt_smt_guitar_dir = os.path.join(root, "IDMT-SMT-GUITAR_V2")
94
+ if os.path.isdir(idmt_smt_guitar_dir):
95
+ files = glob.glob(
96
+ os.path.join(
97
+ idmt_smt_guitar_dir, "IDMT-SMT-GUITAR_V2", "dataset4", "**", "*.wav"
98
+ ),
99
+ recursive=True,
100
+ )
101
+ files = [
102
+ f
103
+ for f in files
104
+ if os.path.basename(f).split("_")[0] in idmt_guitar_splits[mode]
105
+ ]
106
+ file_list += sorted(files)
107
+ print(f"Found {len(files)} files in IDMT-SMT-Guitar {mode}.")
108
+ # ------------------------- IDMT-SMT-BASS -------------------------
109
+ idmt_smt_bass_dir = os.path.join(root, "IDMT-SMT-BASS")
110
+ if os.path.isdir(idmt_smt_bass_dir):
111
+ files = glob.glob(
112
+ os.path.join(idmt_smt_bass_dir, "**", "*.wav"),
113
+ recursive=True,
114
+ )
115
+ files = [
116
+ f
117
+ for f in files
118
+ if os.path.basename(os.path.dirname(f)) in idmt_bass_splits[mode]
119
+ ]
120
+ file_list += sorted(files)
121
+ print(f"Found {len(files)} files in IDMT-SMT-Bass {mode}.")
122
+ # ------------------------- IDMT-SMT-DRUMS -------------------------
123
+ idmt_smt_drums_dir = os.path.join(root, "IDMT-SMT-DRUMS-V2")
124
+ if os.path.isdir(idmt_smt_drums_dir):
125
+ files = glob.glob(os.path.join(idmt_smt_drums_dir, "audio", "*.wav"))
126
+ files = [
127
+ f
128
+ for f in files
129
+ if os.path.basename(f).split("_")[0] in idmt_drums_splits[mode]
130
+ ]
131
+ file_list += sorted(files)
132
+ print(f"Found {len(files)} files in IDMT-SMT-Drums {mode}.")
133
+
134
+ return file_list
135
+
136
 
137
  class VocalSet(Dataset):
138
  def __init__(
 
171
  self.effects_to_keep = self.validate_effect_input()
172
  self.proc_root = self.render_root / "processed" / effects_string / self.mode
173
 
174
+ self.files = locate_files(self.root, self.mode)
 
 
 
 
 
 
 
 
175
 
176
  if self.proc_root.exists() and len(list(self.proc_root.iterdir())) > 0:
177
  print("Found processed files.")