File size: 1,208 Bytes
c91e8cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from pathlib import Path
import json
import os

maestro_path = Path("/media/CHONK/hugo/maestro-v3.0.0")
output_path = Path("/media/CHONK/hugo/maestro-v3.0.0-split")

# split
with open(maestro_path / "maestro-v3.0.0.json") as f:
    maestro = json.load(f)

breakpoint()
train = []
validation = []
test = []
for key, split in maestro["split"].items():
    audio_filename = maestro['audio_filename'][key]
    if split == "train":
        train.append(audio_filename)
    elif split == "test":
        test.append(audio_filename)
    elif split == "validation":
        validation.append(audio_filename)
    else:
        raise ValueError(f"Unknown split {split}")

# symlink all files
for audio_filename in train:
    p = output_path / "train" / audio_filename
    p.parent.mkdir(parents=True, exist_ok=True)
    os.symlink(maestro_path / audio_filename, p)
for audio_filename in validation:
    p = output_path / "validation" / audio_filename
    p.parent.mkdir(parents=True, exist_ok=True)
    os.symlink(maestro_path / audio_filename, p)
for audio_filename in test:
    p = output_path / "test" / audio_filename
    p.parent.mkdir(parents=True, exist_ok=True)
    os.symlink(maestro_path / audio_filename, p)