| | """Given a correspondence.csv file from alignment.py, this script will set up (copy and crop)
|
| | the dataset files."""
|
| | import argparse
|
| | import pandas as pd
|
| | import os
|
| | import librosa
|
| | from shutil import copyfile
|
| | import numpy as np
|
| | from pathlib import Path
|
| |
|
| |
|
| | def clip_and_copy_audio(in_path, out_path, start=None, end=None, padding=0.5):
|
| | """
|
| | Clip the given wav file (if desired), and copy it to a new location.
|
| |
|
| | Parameters
|
| | ----------
|
| | in_path : string
|
| | Path of the input wav file.
|
| |
|
| | out_path : string
|
| | Output path for the resulting wav file.
|
| |
|
| | start : float
|
| | Starting time (in seconds) of the output file. (This many seconds will be clipped
|
| | from the beginning of the input.) None for no start clipping.
|
| |
|
| | end : float
|
| | Endd time (in seconds) for the output file. (The final sample in the output file
|
| | will be the sample at this time in the input file.) None for no end clipping.
|
| |
|
| | padding : float
|
| | Padding (in seconds) to add to the beginning of the piece if start is greater than
|
| | this amount. That is, instead of shifting the audio at the start time to
|
| | time 0, it is shifted to this time, and the beginning is padded with 0s. However, no
|
| | sound will ever be shifted forward of its original time due to this value (so if
|
| | padding > start, it is instead set to start).
|
| | """
|
| | if np.isnan(start) and np.isnan(end):
|
| | if in_path != out_path:
|
| | copyfile(in_path, out_path)
|
| | return
|
| |
|
| | s = 0.0 if np.isnan(start) else start
|
| | dur = None if np.isnan(end) else end - s
|
| | data, sr = librosa.core.load(in_path, sr=None, mono=False,
|
| | offset=s,
|
| | duration=dur)
|
| |
|
| |
|
| |
|
| | if s > 0:
|
| | samples = int(sr * padding)
|
| | if len(data.shape) == 1:
|
| |
|
| | zeros = np.zeros(samples)
|
| | data = np.append(zeros, data)
|
| | else:
|
| |
|
| | zeros = np.zeros((data.shape[0], samples))
|
| | data = np.asfortranarray(np.concatenate( (zeros,data),axis=1))
|
| |
|
| | librosa.output.write_wav(out_path, y=data.astype(np.float32), sr=sr, norm=False)
|
| |
|
| |
|
| |
|
| | if __name__ == '__main__':
|
| | parser = argparse.ArgumentParser(description='Initialize the performed_midi dataset by '
|
| | 'cutting maestro audio files and moving them into their correct locations.',
|
| | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
| |
|
| | parser.add_argument('-m', '--maestro', help='The location of the downloaded maestro data.',
|
| | default='../maestro-v2.0.0', type=str)
|
| |
|
| | parser.add_argument('--metadata', help='The correspondence.csv metadata file.',
|
| | default='metadata.csv', type=pd.read_csv)
|
| |
|
| | args = parser.parse_args()
|
| |
|
| | print("Cutting and copying maestro audio performances")
|
| | counter = 0
|
| | for idx, row in args.metadata.iterrows():
|
| | if not row.isna()["maestro_audio_performance"]:
|
| | try:
|
| | maestro_path = str(Path(row["maestro_audio_performance"])).replace('{maestro}', str(args.maestro))
|
| | clip_and_copy_audio(str(maestro_path),
|
| | str(Path(row["audio_performance"])),
|
| | start=row.start, end=row.end)
|
| | except Exception as e:
|
| | print("Failed for", idx,row["midi_performance"])
|
| | print(e)
|
| | counter+=1
|
| | if counter%20 == 0:
|
| | print("{}/520 completed".format(counter))
|
| |
|