Hugo Flores Garcia commited on
Commit
cf172ac
1 Parent(s): 09b9691

update splits, reqs

Browse files
Files changed (2) hide show
  1. scripts/utils/split.py +17 -4
  2. setup.py +2 -1
scripts/utils/split.py CHANGED
@@ -1,8 +1,12 @@
1
  from pathlib import Path
2
  import random
3
  import shutil
 
 
4
 
5
  import argbind
 
 
6
 
7
  from audiotools.core import util
8
 
@@ -12,8 +16,13 @@ def train_test_split(
12
  audio_folder: str = ".",
13
  test_size: float = 0.2,
14
  seed: int = 42,
 
15
  ):
16
- audio_files = util.find_audio(audio_folder)
 
 
 
 
17
 
18
  # split according to test_size
19
  n_test = int(len(audio_files) * test_size)
@@ -37,10 +46,14 @@ def train_test_split(
37
  for split, files in (
38
  ("train", train_files), ("test", test_files)
39
  ):
40
- for file in files:
41
- out_file = Path(file).parent / split / Path(file).name
42
  out_file.parent.mkdir(exist_ok=True, parents=True)
43
- shutil.copy(file, out_file)
 
 
 
 
44
 
45
 
46
 
 
1
  from pathlib import Path
2
  import random
3
  import shutil
4
+ import os
5
+ import json
6
 
7
  import argbind
8
+ from tqdm import tqdm
9
+ from tqdm.contrib.concurrent import thread_map
10
 
11
  from audiotools.core import util
12
 
 
16
  audio_folder: str = ".",
17
  test_size: float = 0.2,
18
  seed: int = 42,
19
+ pattern: str = "**/*.mp3",
20
  ):
21
+ print(f"finding audio")
22
+
23
+ audio_folder = Path(audio_folder)
24
+ audio_files = list(tqdm(audio_folder.glob(pattern)))
25
+ print(f"found {len(audio_files)} audio files")
26
 
27
  # split according to test_size
28
  n_test = int(len(audio_files) * test_size)
 
46
  for split, files in (
47
  ("train", train_files), ("test", test_files)
48
  ):
49
+ for file in tqdm(files):
50
+ out_file = audio_folder.parent / f"{audio_folder.name}-{split}" / Path(file).name
51
  out_file.parent.mkdir(exist_ok=True, parents=True)
52
+ os.symlink(file, out_file)
53
+
54
+ # save split as json
55
+ with open(Path(audio_folder) / f"{split}.json", "w") as f:
56
+ json.dump([str(f) for f in files], f)
57
 
58
 
59
 
setup.py CHANGED
@@ -39,6 +39,7 @@ setup(
39
  "google-cloud-logging==2.2.0",
40
  "einops",
41
  # "frechet_audio_distance",
42
- "gradio"
 
43
  ],
44
  )
 
39
  "google-cloud-logging==2.2.0",
40
  "einops",
41
  # "frechet_audio_distance",
42
+ "gradio",
43
+ "tensorboardX",
44
  ],
45
  )