File size: 3,621 Bytes
f636680
 
 
 
 
 
 
e7b83d9
f636680
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e7b83d9
f636680
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113

import os
import spotipy
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

from get_scaler import get_scaler
from dotenv import load_dotenv
from spotipy.oauth2 import SpotifyClientCredentials
from collections import defaultdict
from sklearn.metrics import euclidean_distances
from scipy.spatial.distance import cdist

number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']

# Load environment variables from .env file
load_dotenv()

# Access the Spotify API credentials
client_id = os.getenv('SPOTIFY_CLIENT_ID')
client_secret = os.getenv('SPOTIFY_CLIENT_SECRET')

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id,
                              client_secret=client_secret))

def find_song(name):
  song_data = defaultdict()
  results = sp.search(q=name, limit=1, type='track')
  if results["tracks"]["items"] == []:
    return None

  results = results["tracks"]["items"][0]
  track_id = results["id"]
  audio_features = sp.audio_features(track_id)[0]

  song_data["name"] = [name]
  song_data["year"] = [int(results['album']['release_date'].split("-")[0])]
  song_data["artist"] = [results['artists'][0]['name']]
  song_data["explicit"] = [int(results['explicit'])]
  song_data['duration_ms'] = [results['duration_ms']]
  song_data['popularity'] = [results['popularity']]

  for key, value in audio_features.items():
    song_data[key] = value

  return pd.DataFrame(song_data)


def get_song_data(song, spotify_data):

    try:
        song_data = spotify_data[(spotify_data['name'] == song['name'])].iloc[0]
        print(f"Finding the song in the fitted data.")
        return song_data

    except IndexError:
        print(f"Could not find song in the fitted data. Trying to fetch online now...")
        return find_song(song['name'])


def get_mean_vector(song_list, spotify_data):

    song_vectors = []

    for song in song_list:
        song_data = get_song_data(song, spotify_data)
        if song_data is None:
            print('Warning: {} does not exist in Spotify or in database'.format(song['name']))
            continue
        song_vector = song_data[number_cols].values
        song_vectors.append(song_vector)

    song_matrix = np.array(list(song_vectors))
    return np.mean(song_matrix, axis=0)


def flatten_dict_list(dict_list):

    flattened_dict = defaultdict()
    for key in dict_list[0].keys():
        flattened_dict[key] = []

    for dictionary in dict_list:
        for key, value in dictionary.items():
            flattened_dict[key].append(value)

    return flattened_dict


def recommend_songs( song_list, spotify_data, n_songs=10):

    metadata_cols = ['name', 'year', 'artists']
    song_dict = flatten_dict_list(song_list)

    # song_center = get_mean_vector(song_list, spotify_data)

    song_center = get_song_data(song_list[0], spotify_data)[number_cols].values

    scaler = get_scaler()[1]
    scaled_data = scaler.transform(spotify_data[number_cols])
    scaled_song_center = scaler.transform(song_center.reshape(1, -1))
    distances = cdist(scaled_song_center, scaled_data, 'cosine')
    index = list(np.argsort(distances)[:, :n_songs][0])

    rec_songs = spotify_data.iloc[index]
    rec_songs['name'] = rec_songs['name'].apply(lambda x: x.lower())
    song_dict['name'] = [x.lower() for x in song_dict['name']]

    rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
    return rec_songs[metadata_cols].to_dict(orient='records')