Spaces:
Runtime error
Runtime error
import pandas as pd | |
import numpy as np | |
import os | |
from src.cocktails.utilities.cocktail_utilities import get_bunch_of_rep_keys | |
from src.cocktails.utilities.other_scrubbing_utilities import print_recipe | |
from src.cocktails.config import COCKTAILS_CSV_DATA | |
from src.music.config import CHECKPOINTS_PATH, EXPERIMENT_PATH | |
import matplotlib.pyplot as plt | |
from sklearn.cluster import KMeans | |
from sklearn.mixture import GaussianMixture | |
from sklearn.neighbors import NearestNeighbors | |
import pickle | |
import random | |
experiment_path = EXPERIMENT_PATH + '/cocktails/representation_analysis/affective_mapping/' | |
min_max_path = CHECKPOINTS_PATH + "/cocktail_representation/minmax/" | |
cluster_model_path = CHECKPOINTS_PATH + "/music2cocktails/affects2affect_cluster/cluster_model.pickle" | |
affective_space_dimensions = ((-1, 1), (-1, 1), (-1, 1)) # valence, arousal, dominance | |
n_splits = (3, 3, 2) # number of bins per dimension | |
# dimensions_weights = [1, 1, 0.5] | |
dimensions_weights = [1, 1, 1] | |
total_n_clusters = np.prod(n_splits) # total number of bins | |
affective_boundaries = [np.arange(asd[0], asd[1]+1e-6, (asd[1] - asd[0]) / n_split) for asd, n_split in zip(affective_space_dimensions, n_splits)] | |
for af in affective_boundaries: | |
af[-1] += 1e-6 | |
all_keys = get_bunch_of_rep_keys()['custom'] | |
original_affective_keys = get_bunch_of_rep_keys()['affective'] | |
affective_keys = [a.split(' ')[1] for a in original_affective_keys] | |
random.seed(0) | |
cluster_colors = ['#%06X' % random.randint(0, 0xFFFFFF) for _ in range(total_n_clusters)] | |
clustering_method = 'k_means' # 'k_means', 'handcoded', 'agglo', 'spectral' | |
if clustering_method != 'handcoded': | |
total_n_clusters = 10 | |
min_arousal = np.loadtxt(min_max_path + 'min_arousal.txt') | |
max_arousal = np.loadtxt(min_max_path + 'max_arousal.txt') | |
min_val = np.loadtxt(min_max_path + 'min_valence.txt') | |
max_val = np.loadtxt(min_max_path + 'max_valence.txt') | |
min_dom = np.loadtxt(min_max_path + 'min_dominance.txt') | |
max_dom = np.loadtxt(min_max_path + 'max_dominance.txt') | |
def get_cocktail_reps(path, save=False): | |
cocktail_data = pd.read_csv(path) | |
cocktail_reps = np.array([cocktail_data[k] for k in original_affective_keys]).transpose() | |
n_data, dim_rep = cocktail_reps.shape | |
# print(f'{n_data} data points of {dim_rep} dimensions: {affective_keys}') | |
cocktail_reps = normalize_cocktail_reps_affective(cocktail_reps, save=save) | |
if save: | |
np.savetxt(experiment_path + f'cocktail_reps_for_affective_mapping_-1_1_norm_sigmoid_rescaling_{dim_rep}_keys.txt', cocktail_reps) | |
return cocktail_reps | |
def sigmoid(x, shift, beta): | |
return (1 / (1 + np.exp(-(x + shift) * beta)) - 0.5) * 2 | |
def normalize_cocktail_reps_affective(cocktail_reps, save=False): | |
if save: | |
min_cr = cocktail_reps.min(axis=0) | |
max_cr = cocktail_reps.max(axis=0) | |
np.savetxt(min_max_path + 'min_cocktail_reps_affective.txt', min_cr) | |
np.savetxt(min_max_path + 'max_cocktail_reps_affective.txt', max_cr) | |
else: | |
min_cr = np.loadtxt(min_max_path + 'min_cocktail_reps_affective.txt') | |
max_cr = np.loadtxt(min_max_path + 'max_cocktail_reps_affective.txt') | |
cocktail_reps = ((cocktail_reps - min_cr) / (max_cr - min_cr) - 0.5) * 2 | |
cocktail_reps[:, 0] = sigmoid(cocktail_reps[:, 0], shift=0.05, beta=4) | |
cocktail_reps[:, 1] = sigmoid(cocktail_reps[:, 1], shift=0.3, beta=5) | |
cocktail_reps[:, 2] = sigmoid(cocktail_reps[:, 2], shift=0.15, beta=3) | |
cocktail_reps[:, 3] = sigmoid(cocktail_reps[:, 3], shift=0.9, beta=20) | |
cocktail_reps[:, 4] = sigmoid(cocktail_reps[:, 4], shift=0, beta=4) | |
cocktail_reps[:, 5] = sigmoid(cocktail_reps[:, 5], shift=0.2, beta=3) | |
cocktail_reps[:, 6] = sigmoid(cocktail_reps[:, 6], shift=0.5, beta=5) | |
cocktail_reps[:, 7] = sigmoid(cocktail_reps[:, 7], shift=0.2, beta=6) | |
return cocktail_reps | |
def plot(cocktail_reps): | |
dim_rep = cocktail_reps.shape[1] | |
for i in range(dim_rep): | |
for j in range(i+1, dim_rep): | |
plt.figure() | |
plt.scatter(cocktail_reps[:, i], cocktail_reps[:, j], s=150, alpha=0.5) | |
plt.xlabel(affective_keys[i]) | |
plt.ylabel(affective_keys[j]) | |
plt.savefig(experiment_path + f'scatters/{affective_keys[i]}_vs_{affective_keys[j]}.png', dpi=300) | |
plt.close('all') | |
plt.figure() | |
plt.hist(cocktail_reps[:, i]) | |
plt.xlabel(affective_keys[i]) | |
plt.savefig(experiment_path + f'hists/{affective_keys[i]}.png', dpi=300) | |
plt.close('all') | |
def get_clusters(affective_coordinates, save=False): | |
if clustering_method in ['k_means', 'gmm',]: | |
if clustering_method == 'k_means': model = KMeans(n_clusters=total_n_clusters) | |
elif clustering_method == 'gmm': model = GaussianMixture(n_components=total_n_clusters, covariance_type="full") | |
model.fit(affective_coordinates * np.array(dimensions_weights)) | |
def find_cluster(aff_coord): | |
if aff_coord.ndim == 1: | |
aff_coord = aff_coord.reshape(1, -1) | |
return model.predict(aff_coord * np.array(dimensions_weights)) | |
cluster_centers = model.cluster_centers_ if clustering_method == 'k_means' else [] | |
if save: | |
to_save = dict(cluster_model=model, | |
cluster_centers=cluster_centers, | |
nb_clusters=len(cluster_centers), | |
dimensions_weights=dimensions_weights) | |
with open(cluster_model_path, 'wb') as f: | |
pickle.dump(to_save, f) | |
stop= 1 | |
elif clustering_method == 'handcoded': | |
def find_cluster(aff_coord): | |
if aff_coord.ndim == 1: | |
aff_coord = aff_coord.reshape(1, -1) | |
cluster_coordinates = [] | |
for i in range(aff_coord.shape[0]): | |
cluster_coordinates.append([np.argwhere(affective_boundaries[j] <= aff_coord[i, j]).flatten()[-1] for j in range(3)]) | |
cluster_coordinates = np.array(cluster_coordinates) | |
cluster_ids = cluster_coordinates[:, 0] * np.prod(n_splits[1:]) + cluster_coordinates[:, 1] * n_splits[-1] + cluster_coordinates[:, 2] | |
return cluster_ids | |
# find cluster centers | |
cluster_centers = [] | |
for i in range(n_splits[0]): | |
asd = affective_space_dimensions[0] | |
x_coordinate = np.arange(asd[0] + 1 / n_splits[0], asd[1], (asd[1] - asd[0]) / n_splits[0])[i] | |
for j in range(n_splits[1]): | |
asd = affective_space_dimensions[1] | |
y_coordinate = np.arange(asd[0] + 1 / n_splits[1], asd[1], (asd[1] - asd[0]) / n_splits[1])[j] | |
for k in range(n_splits[2]): | |
asd = affective_space_dimensions[2] | |
z_coordinate = np.arange(asd[0] + 1 / n_splits[2], asd[1], (asd[1] - asd[0]) / n_splits[2])[k] | |
cluster_centers.append([x_coordinate, y_coordinate, z_coordinate]) | |
cluster_centers = np.array(cluster_centers) | |
else: | |
raise NotImplemented | |
cluster_ids = find_cluster(affective_coordinates) | |
return cluster_ids, cluster_centers, find_cluster | |
def cocktail2affect(cocktail_reps, save=False): | |
if cocktail_reps.ndim == 1: | |
cocktail_reps = cocktail_reps.reshape(1, -1) | |
assert affective_keys == ['booze', 'sweet', 'sour', 'fizzy', 'complex', 'bitter', 'spicy', 'colorful'] | |
all_weights = [] | |
# valence | |
# + sweet - bitter - booze + colorful | |
weights = np.array([-1, 1, 0, 0, 0, -1, 0, 1]) | |
valence = (cocktail_reps * weights).sum(axis=1) | |
if save: | |
min_ = valence.min() | |
max_ = valence.max() | |
np.savetxt(min_max_path + 'min_valence.txt', np.array([min_])) | |
np.savetxt(min_max_path + 'max_valence.txt', np.array([max_])) | |
else: | |
min_ = min_val | |
max_ = max_val | |
valence = 2 * ((valence - min_) / (max_ - min_) - 0.5) | |
valence = sigmoid(valence, shift=0.1, beta=3.5) | |
valence = valence.reshape(-1, 1) | |
all_weights.append(weights.copy()) | |
# arousal | |
# + fizzy + sour + complex - sweet + spicy + bitter | |
# weights = np.array([0, -1, 1, 1, 1, 1, 1, 0]) | |
weights = np.array([0.7, 0, 1.5, 1.5, 0.6, 0, 0.6, 0]) | |
arousal = (cocktail_reps * weights).sum(axis=1) | |
if save: | |
min_ = arousal.min() | |
max_ = arousal.max() | |
np.savetxt(min_max_path + 'min_arousal.txt', np.array([min_])) | |
np.savetxt(min_max_path + 'max_arousal.txt', np.array([max_])) | |
else: | |
min_, max_ = min_arousal, max_arousal | |
arousal = 2 * ((arousal - min_) / (max_ - min_) - 0.5) # normalize to -1, 1 | |
arousal = sigmoid(arousal, shift=0.3, beta=4) | |
arousal = arousal.reshape(-1, 1) | |
all_weights.append(weights.copy()) | |
# dominance | |
# assert affective_keys == ['booze', 'sweet', 'sour', 'fizzy', 'complex', 'bitter', 'spicy', 'colorful'] | |
# + booze + fizzy - complex - bitter - sweet | |
weights = np.array([1.5, -0.8, 0, 0.7, -1, -1.5, 0, 0]) | |
dominance = (cocktail_reps * weights).sum(axis=1) | |
if save: | |
min_ = dominance.min() | |
max_ = dominance.max() | |
np.savetxt(min_max_path + 'min_dominance.txt', np.array([min_])) | |
np.savetxt(min_max_path + 'max_dominance.txt', np.array([max_])) | |
else: | |
min_, max_ = min_dom, max_dom | |
dominance = 2 * ((dominance - min_) / (max_ - min_) - 0.5) | |
dominance = sigmoid(dominance, shift=-0.05, beta=5) | |
dominance = dominance.reshape(-1, 1) | |
all_weights.append(weights.copy()) | |
affective_coordinates = np.concatenate([valence, arousal, dominance], axis=1) | |
# if save: | |
# assert (affective_coordinates.min(axis=0) == np.array([ac[0] for ac in affective_space_dimensions])).all() | |
# assert (affective_coordinates.max(axis=0) == np.array([ac[1] for ac in affective_space_dimensions])).all() | |
return affective_coordinates, all_weights | |
def save_reps(path, affective_cluster_ids): | |
cocktail_data = pd.read_csv(path) | |
rep_keys = get_bunch_of_rep_keys()['custom'] | |
cocktail_reps = np.array([cocktail_data[k] for k in rep_keys]).transpose() | |
np.savetxt(experiment_path + 'clustered_representations/' + f'min_cocktail_reps_custom_keys_dim{cocktail_reps.shape[1]}.txt', cocktail_reps.min(axis=0)) | |
np.savetxt(experiment_path + 'clustered_representations/' + f'max_cocktail_reps_custom_keys_dim{cocktail_reps.shape[1]}.txt', cocktail_reps.max(axis=0)) | |
cocktail_reps = ((cocktail_reps - cocktail_reps.min(axis=0)) / (cocktail_reps.max(axis=0) - cocktail_reps.min(axis=0)) - 0.5) * 2 # normalize in -1, 1 | |
np.savetxt(experiment_path + 'clustered_representations/' + f'all_cocktail_reps_norm-1_1_custom_keys_dim{cocktail_reps.shape[1]}.txt', cocktail_reps) | |
np.savetxt(experiment_path + 'clustered_representations/' + 'affective_cluster_ids.txt', affective_cluster_ids) | |
for cluster_id in sorted(set(affective_cluster_ids)): | |
indexes = np.argwhere(affective_cluster_ids == cluster_id).flatten() | |
reps = cocktail_reps[indexes, :] | |
np.savetxt(experiment_path + 'clustered_representations/' + f'rep_cluster{cluster_id}_norm-1_1_custom_keys_dim{cocktail_reps.shape[1]}.txt', reps) | |
def study_affects(affective_coordinates, affective_cluster_ids): | |
plt.figure() | |
plt.hist(affective_cluster_ids, bins=total_n_clusters) | |
plt.xlabel('Affective cluster ids') | |
plt.xticks(np.arange(total_n_clusters)) | |
plt.savefig(experiment_path + 'affective_cluster_distrib.png') | |
fig = plt.gcf() | |
plt.close(fig) | |
fig = plt.figure() | |
ax = fig.add_subplot(projection='3d') | |
ax.set_xlim([-1, 1]) | |
ax.set_ylim([-1, 1]) | |
ax.set_zlim([-1, 1]) | |
for cluster_id in sorted(set(affective_cluster_ids)): | |
indexes = np.argwhere(affective_cluster_ids == cluster_id).flatten() | |
ax.scatter(affective_coordinates[indexes, 0], affective_coordinates[indexes, 1], affective_coordinates[indexes, 2], c=cluster_colors[cluster_id], s=150) | |
ax.set_xlabel('Valence') | |
ax.set_ylabel('Arousal') | |
ax.set_zlabel('Dominance') | |
stop = 1 | |
plt.savefig(experiment_path + 'scatters_affect/affective_mapping.png') | |
fig = plt.gcf() | |
plt.close(fig) | |
affects = ['Valence', 'Arousal', 'Dominance'] | |
for i in range(3): | |
for j in range(i + 1, 3): | |
fig = plt.figure() | |
ax = fig.add_subplot() | |
for cluster_id in sorted(set(affective_cluster_ids)): | |
indexes = np.argwhere(affective_cluster_ids == cluster_id).flatten() | |
ax.scatter(affective_coordinates[indexes, i], affective_coordinates[indexes, j], alpha=0.5, c=cluster_colors[cluster_id], s=150) | |
ax.set_xlabel(affects[i]) | |
ax.set_ylabel(affects[j]) | |
plt.savefig(experiment_path + f'scatters_affect/scatter_{affects[i]}_vs_{affects[j]}.png') | |
fig = plt.gcf() | |
plt.close(fig) | |
plt.figure() | |
plt.hist(affective_coordinates[:, i]) | |
plt.xlabel(affects[i]) | |
plt.savefig(experiment_path + f'hists_affect/hist_{affects[i]}.png') | |
fig = plt.gcf() | |
plt.close(fig) | |
plt.close('all') | |
stop = 1 | |
def sample_clusters(path, cocktail_reps, all_weights, affective_cluster_ids, affective_cluster_centers, affective_coordinates, n_samples=4): | |
cocktail_data = pd.read_csv(path) | |
these_cocktail_reps = normalize_cocktail_reps_affective(np.array([cocktail_data[k] for k in original_affective_keys]).transpose()) | |
names = cocktail_data['names'] | |
urls = cocktail_data['urls'] | |
ingr_str = cocktail_data['ingredients_str'] | |
for cluster_id in sorted(set(affective_cluster_ids)): | |
indexes = np.argwhere(affective_cluster_ids == cluster_id).flatten() | |
print('\n\n\n---------\n----------\n-----------\n') | |
cluster_str = '' | |
cluster_str += f'Affective cluster #{cluster_id}' + \ | |
f'\n\tSize: {len(indexes)}' + \ | |
f'\n\tCenter: ' + \ | |
f'\n\t\tVal: {affective_cluster_centers[cluster_id][0]:.2f}, ' + \ | |
f'\n\t\tArousal: {affective_cluster_centers[cluster_id][1]:.2f}, ' + \ | |
f'\n\t\tDominance: {affective_cluster_centers[cluster_id][2]:.2f}' | |
print(cluster_str) | |
if affective_cluster_centers[cluster_id][2] == np.max(affective_cluster_centers[:, 2]): | |
stop = 1 | |
sampled_idx = np.random.choice(indexes, size=min(len(indexes), n_samples), replace=False) | |
cocktail_str = '' | |
for i in sampled_idx: | |
assert np.sum(cocktail_reps[i] - these_cocktail_reps[i]) < 1e-9 | |
cocktail_str += f'\n\n-------------' | |
cocktail_str += print_recipe(ingr_str[i], name=names[i], to_print=False) | |
cocktail_str += f'\nUrl: {urls[i]}' | |
cocktail_str += '\n\nRepresentation: ' + ', '.join([f'{af}: {cr:.2f}' for af, cr in zip(affective_keys, cocktail_reps[i])]) + '\n' | |
cocktail_str += '\n' + generate_explanation(cocktail_reps[i], all_weights, affective_coordinates[i]) | |
print(cocktail_str) | |
stop = 1 | |
cluster_str += '\n' + cocktail_str | |
with open(f"/home/cedric/Documents/pianocktail/experiments/cocktails/representation_analysis/affective_mapping/clusters/cluster_{cluster_id}", 'w') as f: | |
f.write(cluster_str) | |
stop = 1 | |
def explanation_per_dimension(i, cocktail_rep, all_weights, aff_coord): | |
names = ['valence', 'arousal', 'dominance'] | |
weights = all_weights[i] | |
explanation_str = f'\n{names[i].capitalize()} explanation ({aff_coord[i]:.2f}):' | |
strengths = np.abs(weights * cocktail_rep) | |
strengths /= strengths.sum() | |
indexes = np.flip(np.argsort(strengths)) | |
for ind in indexes: | |
if strengths[ind] != 0: | |
if np.sign(weights[ind]) == np.sign(cocktail_rep[ind]): | |
keyword = 'high' if cocktail_rep[ind] > 0 else 'low' | |
explanation_str += f'\n\t{int(strengths[ind]*100)}%: higher {names[i]} because {keyword} {affective_keys[ind]}' | |
else: | |
keyword = 'high' if cocktail_rep[ind] > 0 else 'low' | |
explanation_str += f'\n\t{int(strengths[ind]*100)}%: low {names[i]} because {keyword} {affective_keys[ind]}' | |
return explanation_str | |
def generate_explanation(cocktail_rep, all_weights, aff_coord): | |
explanation_str = '' | |
for i in range(3): | |
explanation_str += explanation_per_dimension(i, cocktail_rep, all_weights, aff_coord) | |
return explanation_str | |
def cocktails2affect_clusters(cocktail_rep): | |
if cocktail_rep.ndim == 1: | |
cocktail_rep = cocktail_rep.reshape(1, -1) | |
affective_coordinates, _ = cocktail2affect(cocktail_rep) | |
affective_cluster_ids, _, _ = get_clusters(affective_coordinates) | |
return affective_cluster_ids | |
def setup_affective_space(path, save=False): | |
cocktail_data = pd.read_csv(path) | |
names = cocktail_data['names'] | |
recipes = cocktail_data['ingredients_str'] | |
urls = cocktail_data['urls'] | |
reps = get_cocktail_reps(path) | |
affective_coordinates, all_weights = cocktail2affect(reps) | |
affective_cluster_ids, affective_cluster_centers, find_cluster = get_clusters(affective_coordinates, save=save) | |
nn_model = NearestNeighbors(n_neighbors=1) | |
nn_model.fit(affective_coordinates) | |
def cocktail2affect_cluster(cocktail_rep): | |
affective_coordinates, _ = cocktail2affect(cocktail_rep) | |
return find_cluster(affective_coordinates) | |
affective_clusters = dict(affective_coordinates=affective_coordinates, # coordinates of cocktail in affective space | |
affective_cluster_ids=affective_cluster_ids, # cluster id of cocktails | |
affective_cluster_centers=affective_cluster_centers, # cluster centers in affective space | |
affective_weights=all_weights, # weights to compute valence, arousal, dominance from cocktail representations | |
original_affective_keys=original_affective_keys, | |
cocktail_reps=reps, # cocktail representations from the dataset (normalized) | |
find_cluster=find_cluster, # function to retrieve a cluster from affective coordinates | |
nn_model=nn_model, # to predict the nearest neighbor affective space, | |
names=names, # names of cocktails in the dataset | |
urls=urls, # urls from the dataset | |
recipes=recipes, # recipes of the dataset | |
cocktail2affect=cocktail2affect, # function to compute affects from cocktail representations | |
cocktails2affect_clusters=cocktails2affect_clusters, | |
cocktail2affect_cluster=cocktail2affect_cluster | |
) | |
return affective_clusters | |
if __name__ == '__main__': | |
reps = get_cocktail_reps(COCKTAILS_CSV_DATA, save=True) | |
# plot(reps) | |
affective_coordinates, all_weights = cocktail2affect(reps, save=True) | |
affective_cluster_ids, affective_cluster_centers, find_cluster = get_clusters(affective_coordinates) | |
save_reps(COCKTAILS_CSV_DATA, affective_cluster_ids) | |
study_affects(affective_coordinates, affective_cluster_ids) | |
sample_clusters(COCKTAILS_CSV_DATA, reps, all_weights, affective_cluster_ids, affective_cluster_centers, affective_coordinates) | |
setup_affective_space(COCKTAILS_CSV_DATA, save=True) | |