victan
/

audio_seam

Model card Files Files and versions Community

victan commited on Dec 19, 2023

Commit

e102e80

•

1 Parent(s): 4bd6d90

Upload augment.py with huggingface_hub

Browse files

Files changed (1) hide show

augment.py +78 -0

augment.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import argparse
+import os
+import subprocess
+import librosa
+import numpy as np
+import soundfile as sf
+from tqdm import tqdm
+from lib import dataset
+from lib import spec_utils
+if __name__ == '__main__':
+    p = argparse.ArgumentParser()
+    p.add_argument('--sr', '-r', type=int, default=44100)
+    p.add_argument('--hop_length', '-l', type=int, default=1024)
+    p.add_argument('--n_fft', '-f', type=int, default=2048)
+    p.add_argument('--pitch', '-p', type=int, default=-1)
+    p.add_argument('--mixtures', '-m', required=True)
+    p.add_argument('--instruments', '-i', required=True)
+    args = p.parse_args()
+    input_i = 'input_i_{}.wav'.format(args.pitch)
+    input_v = 'input_v_{}.wav'.format(args.pitch)
+    output_i = 'output_i_{}.wav'.format(args.pitch)
+    output_v = 'output_v_{}.wav'.format(args.pitch)
+    cmd_i = 'soundstretch {} {} -pitch={}'.format(input_i, output_i, args.pitch)
+    cmd_v = 'soundstretch {} {} -pitch={}'.format(input_v, output_v, args.pitch)
+    cache_suffix = '_pitch{}.npy'.format(args.pitch)
+    cache_dir = 'sr{}_hl{}_nf{}'.format(args.sr, args. hop_length, args.n_fft)
+    mix_cache_dir = os.path.join(args.mixtures, cache_dir)
+    inst_cache_dir = os.path.join(args.instruments, cache_dir)
+    os.makedirs(mix_cache_dir, exist_ok=True)
+    os.makedirs(inst_cache_dir, exist_ok=True)
+    filelist = dataset.make_pair(args.mixtures, args.instruments)
+    for mix_path, inst_path in tqdm(filelist):
+        mix_basename = os.path.splitext(os.path.basename(mix_path))[0]
+        mix_cache_path = os.path.join(mix_cache_dir, mix_basename + cache_suffix)
+        inst_basename = os.path.splitext(os.path.basename(inst_path))[0]
+        inst_cache_path = os.path.join(inst_cache_dir, inst_basename + cache_suffix)
+        if os.path.exists(mix_cache_path) and os.path.exists(inst_cache_path):
+            continue
+        X, _ = librosa.load(
+            mix_path, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
+        y, _ = librosa.load(
+            inst_path, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
+        X, y = spec_utils.align_wave_head_and_tail(X, y, args.sr)
+        v = X - y
+        sf.write(input_i, y.T, args.sr)
+        sf.write(input_v, v.T, args.sr)
+        subprocess.call(cmd_i, stderr=subprocess.DEVNULL)
+        subprocess.call(cmd_v, stderr=subprocess.DEVNULL)
+        y, _ = librosa.load(
+            output_i, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
+        v, _ = librosa.load(
+            output_v, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
+        X = y + v
+        spec = spec_utils.wave_to_spectrogram(X, args.hop_length, args.n_fft)
+        np.save(mix_cache_path, spec)
+        spec = spec_utils.wave_to_spectrogram(y, args.hop_length, args.n_fft)
+        np.save(inst_cache_path, spec)
+        os.remove(input_i)
+        os.remove(input_v)
+        os.remove(output_i)
+        os.remove(output_v)