beat_tracking / stack_predictions.py
Giovanni Inchiostro
Initial Hugging Face deployment
3ab6186
import parameters as params
import numpy as np
import tensorflow as tf
def get_copies(song, mel_sr=params.MEL_SAMPLING_RATE):
num_2_secs = song.shape[0]//(2*mel_sr)
copies = []
for _ in range(num_2_secs - 4):
copies.append(np.array([song[_*2*mel_sr:_*2*mel_sr + params.NUM_SECONDS*mel_sr,:]]))
return copies
def get_results(len_song, results, mel_sr=params.MEL_SAMPLING_RATE):
stacked_res = []
for _ in range(len(results)):
current_predictions = results[_]
new = np.concatenate((np.zeros((1, _*2*mel_sr)),
current_predictions,
np.zeros((1, len_song - _*2*mel_sr - current_predictions.shape[1]))), axis=1)
stacked_res.append(new)
return stacked_res
def get_mean(stacked_results, mel_sr=params.MEL_SAMPLING_RATE):
without_mean = stacked_results[0]
for _ in stacked_results[1:]:
without_mean+= _
return without_mean
def mean_predictions(song, model, mel_sr=params.MEL_SAMPLING_RATE):
res = get_copies(song, mel_sr)
for _ in range(len(res)):
#print('processing part of the song ', str(_))
outcomes = model(res[_])
res[_] = outcomes
stacked_res = get_results(song.shape[0], res, mel_sr)
means = get_mean(stacked_res, mel_sr)
for_normalizing = []
for _ in range(len(res)):
for_normalizing.append(np.ones((1,res[0].shape[1])))
for_normalizing = get_results(song.shape[0], for_normalizing, mel_sr)
for_normalizing = get_mean(for_normalizing, mel_sr)
return tf.convert_to_tensor(means[0]/np.maximum(for_normalizing, np.ones(for_normalizing.shape)), dtype='float32')