Hugo Flores Garcia commited on
Commit
c068a29
1 Parent(s): b3caf82

maestro script

Browse files
Files changed (1) hide show
  1. scripts/utils/maestro-reorg.py +39 -0
scripts/utils/maestro-reorg.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import json
3
+ import os
4
+
5
+ maestro_path = Path("/media/CHONK/hugo/maestro-v3.0.0")
6
+ output_path = Path("/media/CHONK/hugo/maestro-v3.0.0-split")
7
+
8
+ # split
9
+ with open(maestro_path / "maestro-v3.0.0.json") as f:
10
+ maestro = json.load(f)
11
+
12
+ breakpoint()
13
+ train = []
14
+ validation = []
15
+ test = []
16
+ for key, split in maestro["split"].items():
17
+ audio_filename = maestro['audio_filename'][key]
18
+ if split == "train":
19
+ train.append(audio_filename)
20
+ elif split == "test":
21
+ test.append(audio_filename)
22
+ elif split == "validation":
23
+ validation.append(audio_filename)
24
+ else:
25
+ raise ValueError(f"Unknown split {split}")
26
+
27
+ # symlink all files
28
+ for audio_filename in train:
29
+ p = output_path / "train" / audio_filename
30
+ p.parent.mkdir(parents=True, exist_ok=True)
31
+ os.symlink(maestro_path / audio_filename, p)
32
+ for audio_filename in validation:
33
+ p = output_path / "validation" / audio_filename
34
+ p.parent.mkdir(parents=True, exist_ok=True)
35
+ os.symlink(maestro_path / audio_filename, p)
36
+ for audio_filename in test:
37
+ p = output_path / "test" / audio_filename
38
+ p.parent.mkdir(parents=True, exist_ok=True)
39
+ os.symlink(maestro_path / audio_filename, p)