Spaces:
Sleeping
Sleeping
File size: 2,294 Bytes
bfbfae6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import librosa
import numpy as np
from pathlib import Path
import json
import os.path
import sys
import argparse
'''
Compute transforms which can be computed sequentially (so they implement the `partial_fit` function)
'''
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = os.path.abspath(os.path.join(THIS_DIR, os.pardir))
sys.path.append(ROOT_DIR)
parser = argparse.ArgumentParser(description="Preprocess songs data")
parser.add_argument("data_path", type=str, help="Directory contining Beat Saber level folders")
parser.add_argument("--feature_name", metavar='', type=str, default="mel", help="mel, chroma, multi_mel")
parser.add_argument("--transforms", metavar='', type=str, default="scaler", help="comma-separated lists of transforms to extract (scaler,pca_transform)")
args = parser.parse_args()
# makes arugments into global variables of the same name, used later in the code
globals().update(vars(args))
data_path = Path(data_path)
## distributing tasks accross nodes ##
from mpi4py import MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
print(rank)
assert size == 1
candidate_files = sorted(data_path.glob('**/*'+feature_name+'.npy'), key=lambda path: path.parent.__str__())
tasks = range(len(candidate_files))
from sklearn import decomposition, preprocessing
import pickle
transforms = transforms.split(",")
transforms_dict = {}
for transform in transforms:
if transform == "scaler":
scaler = preprocessing.StandardScaler()
transforms_dict["scaler"] = scaler
elif transform == "pca_transform":
features = np.load(candidate_files[0].__str__())
feature_size = features.shape[1]
pca = decomposition.PCA(n_components=feature_size)
transforms_dict["pca_transform"] = pca
else:
raise NotImplementedError("Transform type "+transform+" not implemented")
for i in tasks:
path = candidate_files[i]
feature_file = path.__str__()
features = np.load(feature_file)
for transform in transforms:
if len(features.shape) == 3:
features = features[:,0,:]
transforms_dict[transform].partial_fit(features)
for transform in transforms:
pickle.dump(transforms_dict[transform], open(data_path.joinpath(feature_name+'_'+transform+'.pkl'), 'wb'))
|