from scipy.ndimage import median_filter
import json
import numpy as np
from pathlib import Path
LOW = 250
HIGH = 4000
FPS = 100
43.06640625, 64.599609375, 86.1328125, 107.666015625, 129.19921875, 150.732421875, 172.265625, 193.798828125,
215.33203125, 236.865234375, 258.3984375, 279.931640625, 301.46484375, 322.998046875, 344.53125, 366.064453125,
387.59765625, 409.130859375, 430.6640625, 452.197265625, 495.263671875, 516.796875, 538.330078125, 581.396484375,
624.462890625, 645.99609375, 689.0625, 732.12890625, 775.1953125, 839.794921875, 882.861328125, 925.927734375,
990.52734375, 1055.126953125, 1098.193359375, 1184.326171875, 1248.92578125, 1313.525390625, 1399.658203125,
1485.791015625, 1571.923828125, 1658.056640625, 1765.72265625, 1873.388671875, 1981.0546875, 2088.720703125,
2217.919921875, 2347.119140625, 2497.8515625, 2627.05078125, 2799.31640625, 2950.048828125, 3143.84765625,
3316.11328125, 3509.912109375, 3725.244140625, 3940.576171875, 4177.44140625, 4435.83984375, 4694.23828125,
4974.169921875, 5275.634765625, 5577.099609375, 5921.630859375, 6266.162109375, 6653.759765625, 7041.357421875,
7450.48828125, 7902.685546875, 8376.416015625, 8871.6796875, 9388.4765625, 9948.33984375, 10551.26953125,
11175.732421875, 11843.26171875, 12553.857421875, 13285.986328125, 14082.71484375, 14922.509765625, 15805.37109375
BIN_FREQS = np.array(BIN_FREQS).round().astype(int)
def to_uint8_list(arr):
"""Converts a numpy array to a list of uint8 values."""
scaled_arr = (arr * 255).astype(np.uint8)
return scaled_arr.tolist()
def apply_to_dict(d, func):
"""Recursively applies func to the leaf values of a nested dictionary."""
for key, value in d.items():
if isinstance(value, dict):
apply_to_dict(value, func)
d[key] = func(value)
def convert_segments(input_data):
segments_output = []
labels_output = []
# Extracting segments and appending to the respective lists
for segment in input_data.segments:
# Appending the end time of the last segment
return {"segments": segments_output, "labels": labels_output}
def process(specs, struct, name):
i_low = np.flatnonzero(BIN_FREQS < LOW)
i_high = np.flatnonzero(BIN_FREQS > HIGH)
i_mid = np.flatnonzero((LOW <= BIN_FREQS) & (BIN_FREQS <= HIGH))
# Compute the max energy value for each frequency band considering all instruments.
max_low = specs[:, :, i_low].max()
max_mid = specs[:, :, i_mid].max()
max_high = specs[:, :, i_high].max()
wavs_low, wavs_mid, wavs_high = [
specs[:, :, indices].mean(axis=-1)
# spec[:, indices].mean(axis=1)
for indices in [i_low, i_mid, i_high]
wavs_low /= max_low
wavs_mid /= max_mid
wavs_high /= max_high
assert wavs_low.max() <= 1.0
assert wavs_mid.max() <= 1.0
assert wavs_high.max() <= 1.0
navs_low = np.array([median_filter(wav, size=FPS) for wav in wavs_low])
navs_mid = np.array([median_filter(wav, size=FPS) for wav in wavs_mid])
navs_high = np.array([median_filter(wav, size=FPS) for wav in wavs_high])
navs_low = navs_low
navs_mid = navs_low + navs_mid
navs_high = navs_mid + navs_high
max_nav = np.max([navs_low.max(), navs_mid.max(), navs_high.max()])
navs_low /= max_nav
navs_mid /= max_nav
navs_high /= max_nav
assert navs_high.max() <= 1.0
data = {
'nav': {},
'wav': {},
for (
eg_low, eg_mid, eg_high,
nav_low, nav_mid, nav_high,
) in zip(
wavs_low, wavs_mid, wavs_high,
navs_low, navs_mid, navs_high,
data['wav'][inst] = {
'low': eg_low,
'mid': eg_mid,
'high': eg_high,
data['nav'][inst] = {
'low': nav_low,
'mid': nav_mid,
'high': nav_high,
apply_to_dict(data, to_uint8_list)
data['duration'] = specs.shape[1] / FPS
data['scores'] = {
"segment": {
"Ref-to-est deviation":0,
"Est-to-ref deviation":0,
"Pairwise Precision":0,
"Pairwise Recall":0,
"Pairwise F-measure":0,
"Rand Index":0,
"Adjusted Rand Index":0,
"Mutual Information":0,
"Adjusted Mutual Information":0,
"Normalized Mutual Information":0,
"NCE Over":0,
"NCE Under":0,
"NCE F-measure":0,
"V Precision":0,
"V Recall":0,
"beat": {
"downbeat": {
data['id'] = name
data['truths'] = {'beats':, 'downbeats': struct.downbeats, **convert_segments(struct)}
data['inferences'] = data['truths']
filename = f'dissector.{name}.json'
with open(filename, 'w') as file:
return filename
def generate_dissector_data(name, result):
spec_path = Path(f'./spec/{name}.npy').resolve().as_posix()
struct_path = Path(f'./struct/{name}.json').resolve().as_posix()
specs = np.load(spec_path)
return process(specs, result, name)