victan commited on
Commit
e102e80
1 Parent(s): 4bd6d90

Upload augment.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. augment.py +78 -0
augment.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import subprocess
4
+
5
+ import librosa
6
+ import numpy as np
7
+ import soundfile as sf
8
+ from tqdm import tqdm
9
+
10
+ from lib import dataset
11
+ from lib import spec_utils
12
+
13
+
14
+ if __name__ == '__main__':
15
+ p = argparse.ArgumentParser()
16
+ p.add_argument('--sr', '-r', type=int, default=44100)
17
+ p.add_argument('--hop_length', '-l', type=int, default=1024)
18
+ p.add_argument('--n_fft', '-f', type=int, default=2048)
19
+ p.add_argument('--pitch', '-p', type=int, default=-1)
20
+ p.add_argument('--mixtures', '-m', required=True)
21
+ p.add_argument('--instruments', '-i', required=True)
22
+ args = p.parse_args()
23
+
24
+ input_i = 'input_i_{}.wav'.format(args.pitch)
25
+ input_v = 'input_v_{}.wav'.format(args.pitch)
26
+ output_i = 'output_i_{}.wav'.format(args.pitch)
27
+ output_v = 'output_v_{}.wav'.format(args.pitch)
28
+ cmd_i = 'soundstretch {} {} -pitch={}'.format(input_i, output_i, args.pitch)
29
+ cmd_v = 'soundstretch {} {} -pitch={}'.format(input_v, output_v, args.pitch)
30
+ cache_suffix = '_pitch{}.npy'.format(args.pitch)
31
+
32
+ cache_dir = 'sr{}_hl{}_nf{}'.format(args.sr, args. hop_length, args.n_fft)
33
+ mix_cache_dir = os.path.join(args.mixtures, cache_dir)
34
+ inst_cache_dir = os.path.join(args.instruments, cache_dir)
35
+ os.makedirs(mix_cache_dir, exist_ok=True)
36
+ os.makedirs(inst_cache_dir, exist_ok=True)
37
+
38
+ filelist = dataset.make_pair(args.mixtures, args.instruments)
39
+ for mix_path, inst_path in tqdm(filelist):
40
+ mix_basename = os.path.splitext(os.path.basename(mix_path))[0]
41
+ mix_cache_path = os.path.join(mix_cache_dir, mix_basename + cache_suffix)
42
+
43
+ inst_basename = os.path.splitext(os.path.basename(inst_path))[0]
44
+ inst_cache_path = os.path.join(inst_cache_dir, inst_basename + cache_suffix)
45
+
46
+ if os.path.exists(mix_cache_path) and os.path.exists(inst_cache_path):
47
+ continue
48
+
49
+ X, _ = librosa.load(
50
+ mix_path, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
51
+ y, _ = librosa.load(
52
+ inst_path, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
53
+
54
+ X, y = spec_utils.align_wave_head_and_tail(X, y, args.sr)
55
+ v = X - y
56
+
57
+ sf.write(input_i, y.T, args.sr)
58
+ sf.write(input_v, v.T, args.sr)
59
+ subprocess.call(cmd_i, stderr=subprocess.DEVNULL)
60
+ subprocess.call(cmd_v, stderr=subprocess.DEVNULL)
61
+
62
+ y, _ = librosa.load(
63
+ output_i, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
64
+ v, _ = librosa.load(
65
+ output_v, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
66
+
67
+ X = y + v
68
+
69
+ spec = spec_utils.wave_to_spectrogram(X, args.hop_length, args.n_fft)
70
+ np.save(mix_cache_path, spec)
71
+
72
+ spec = spec_utils.wave_to_spectrogram(y, args.hop_length, args.n_fft)
73
+ np.save(inst_cache_path, spec)
74
+
75
+ os.remove(input_i)
76
+ os.remove(input_v)
77
+ os.remove(output_i)
78
+ os.remove(output_v)