gsliwoski commited on
Commit
c0b0603
1 Parent(s): 8d83c39

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +62 -0
  2. artist_recommender.py +228 -0
  3. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import sys
4
+ sys.path.extend(["."])
5
+ import artist_recommender
6
+
7
+ def export_csv(ad_output):
8
+ ad_output.to_csv("output.csv", index=False)
9
+ return gr.File(value="output.csv", visible=True)
10
+
11
+ def get_artist_recommendations(client_id, client_secret, client_redirect_uri, artist_list, create_playlist):
12
+ print("Initializing Spotify")
13
+ creds = {
14
+ "SPOTIPY_CLIENT_ID": client_id,
15
+ "SPOTIPY_CLIENT_SECRET": client_secret,
16
+ "SPOTIPY_REDIRECT_URI": client_redirect_uri
17
+ }
18
+ artist_recommender.initialize_spotify_client(creds)
19
+ sp = artist_recommender.sp
20
+ try:
21
+ _ = sp.current_user()
22
+ except:
23
+ print("Failed to initialize Spotify, are credentials correct?")
24
+ sys.exit()
25
+ tracks = artist_recommender.get_recently_played(selected_artists=artist_list)
26
+ print("Getting reference features")
27
+ reference_df = pd.DataFrame.from_records(tracks)
28
+ print("Getting matching artist features")
29
+ artist_features = artist_recommender.get_matching_artists(tracks)
30
+ artist_df = pd.DataFrame(artist_features)
31
+ reference_df['source'] = 'reference'
32
+ artist_df['source'] = 'artist'
33
+ df = pd.concat([reference_df, artist_df],ignore_index=True)
34
+ closest_artists = artist_recommender.get_closest_artists(df, artist_recommender.MUSIC_FEATURES+artist_recommender.SHEET_FEATURES)
35
+ closest_artists.to_csv("closest_artists.csv", index=False)
36
+ if create_playlist:
37
+ artist_recommender.generate_playlist(closest_artists, artist_list)
38
+ return closest_artists
39
+
40
+ with gr.Blocks() as demo:
41
+ gr.Markdown(
42
+ """
43
+ # Spotify Artist Recommender
44
+ """
45
+ )
46
+ client_id = gr.Textbox(label="SPOTIFY_CLIENT_ID", value="")
47
+ client_secret = gr.Textbox(label="SPOTIFY_CLIENT_SECRET", value="")
48
+ client_redirect_uri = gr.Textbox(label="SPOTIFY_REDIRECT_URI", value="")
49
+ artist_list = gr.Textbox(label="Arist list (Optional). Leave blank to use your recent activity. Otherwisr a CSV of artists", value="")
50
+ create_playlist = gr.Checkbox(label="Generate a playlist", value=True)
51
+ button = gr.Button(label="Get Artist Recommendations")
52
+ output1 = gr.DataFrame(headers=['artist', 'artist_id', 'distance', 'artist_url'], interactive=False, wrap=True)
53
+ button.click(fn=get_artist_recommendations,
54
+ inputs=[client_id, client_secret, client_redirect_uri, artist_list, create_playlist],
55
+ outputs=output1)
56
+
57
+ export_button = gr.Button("Export Recommendations")
58
+ csv = gr.File(interactive=False, visible=False)
59
+ export_button.click(fn=export_csv, inputs=[output1], outputs=[csv])
60
+
61
+ if __name__ == '__main__':
62
+ demo.launch()
artist_recommender.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import spotipy
3
+ from spotipy.oauth2 import SpotifyOAuth
4
+ import random
5
+ from sklearn.preprocessing import StandardScaler
6
+ import numpy as np
7
+ from scipy.spatial.distance import cosine
8
+ import json
9
+ from tqdm import tqdm
10
+ import argparse
11
+ import sys
12
+ from datetime import datetime
13
+ pd.set_option('display.max_colwidth', None)
14
+
15
+ sp = None
16
+
17
+ MUSIC_FEATURES = ["danceability", "energy", "loudness", "speechiness", "acousticness", "instrumentalness", "liveness", "valence"]
18
+ SHEET_FEATURES = ["key", "mode", "tempo"]
19
+
20
+ def initialize_spotify_client(credentials_file, isfile=True):
21
+ global sp
22
+ if isfile:
23
+ creds = json.load(open(credentials_file))
24
+ else:
25
+ creds = credentials_file
26
+ SPOTIPY_CLIENT_ID = creds['SPOTIPY_CLIENT_ID']
27
+ SPOTIPY_CLIENT_SECRET = creds['SPOTIPY_CLIENT_SECRET']
28
+ SPOTIPY_REDIRECT_URI = creds['SPOTIPY_REDIRECT_URI']
29
+ SCOPE = 'playlist-modify-public user-read-recently-played'
30
+ sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=SPOTIPY_CLIENT_ID,
31
+ client_secret=SPOTIPY_CLIENT_SECRET,
32
+ redirect_uri=SPOTIPY_REDIRECT_URI,
33
+ scope=SCOPE))
34
+
35
+ def find_artists_with_matching_genres(target_genres):
36
+ matched_artists = []
37
+ # Try searching for a perfect match of genres then randomly remove one genre at a time until get at least 100
38
+ while len(target_genres) > 0 and len(matched_artists) < 10:
39
+ print(f"Trying {target_genres}")
40
+ query = " AND ".join([f"genre:\"{genre}\"" for genre in target_genres])
41
+ artist_results = sp.search(q=query, type='artist', limit=10)
42
+ matched_artists.extend([{'id':artist['id'], 'name':artist['name'], 'artist_url':artist['external_urls']['spotify']} for artist in artist_results['artists']['items'] if artist not in matched_artists])
43
+ random_item = random.choice(target_genres)
44
+ target_genres.remove(random_item)
45
+ return matched_artists
46
+
47
+ def get_top_track_features(artist_id):
48
+ try:
49
+ top_tracks = sp.artist_top_tracks(artist_id, country='US')['tracks']
50
+ if top_tracks:
51
+ top_track_id = top_tracks[0]['id']
52
+ top_track_name = top_tracks[0]['name']
53
+ features = sp.audio_features([top_track_id])[0]
54
+ features['track_id'] = top_track_id
55
+ features['track_name'] = top_track_name
56
+ if not features:
57
+ print("Audio features could not be retrieved.")
58
+ features = {}
59
+ else:
60
+ print("No top tracks found for this artist.")
61
+ features = {}
62
+ except Exception as e:
63
+ print(f"Error retrieving top track for artist: {e}")
64
+ features = {}
65
+ return features
66
+
67
+ def average_cosine_distance(A, B):
68
+ average_distances = []
69
+ for b in B:
70
+ distances = [cosine(b, a) for a in A]
71
+ average_distances.append(np.mean(distances))
72
+ return np.array(average_distances)
73
+
74
+ def get_recently_played(limit=50, selected_artists = ""):
75
+ if len(selected_artists) == 0:
76
+ print("Getting most recently played artists")
77
+ results = sp.current_user_recently_played(limit=limit)
78
+ else:
79
+ selected_artists = [x.strip() for x in selected_artists.split(",")]
80
+ print(f"Using supplied list of artists (first 10 artists only)")
81
+ results = {'items':[]}
82
+ for artist in selected_artists[:10]:
83
+ artist_objects = sp.search(q=f"artist: {artist}", type='artist')
84
+ try:
85
+ artist_objects = sorted([x for x in artist_objects['artists']['items'] if x['name'].lower() == artist.lower()],
86
+ key= lambda x: x['popularity'], reverse=True)
87
+ except KeyError:
88
+ artist_objects = []
89
+ if len(artist_objects) == 0:
90
+ print(f"{artist} not found")
91
+ continue
92
+ elif len(artist_objects) > 1:
93
+ print(f"Multiple artist_id found for {artist}, selecting the most popular artist_id in the list.")
94
+ artist_id = artist_objects[0]['id']
95
+ try:
96
+ artist_url = artist_objects[0]['external_urls']['spotify']
97
+ except KeyError:
98
+ artist_url = ""
99
+ results['items'].append({
100
+ 'track': {
101
+ 'artists': [{'id': artist_id,
102
+ 'name': artist,
103
+ 'external_urls': {'spotify': artist_url}}]}})
104
+ tracks = []
105
+ for idx, item in enumerate(results['items']):
106
+ try:
107
+ track = item['track']
108
+ artist_id = track['artists'][0]['id']
109
+ except KeyError as e:
110
+ print(f"Failed index {idx}:")
111
+ print(e)
112
+ continue
113
+ if artist_id in [x['artist_id'] for x in tracks]:
114
+ continue
115
+ elif len(tracks) >= 10:
116
+ break
117
+ features = get_top_track_features(artist_id)
118
+ if len(features.keys()) == 0:
119
+ print(f"No features found for artist: {artist_id}")
120
+ continue
121
+ try:
122
+ features['name'] = track['artists'][0]['name']
123
+ except KeyError as e:
124
+ print(f"Failed to get artists name for {artist_id}:")
125
+ print(e)
126
+ features['name'] = np.nan
127
+ features['artist_id'] = artist_id
128
+ try:
129
+ features['artist_url'] = track['artists'][0]['external_urls']['spotify']
130
+ except KeyError as e:
131
+ print(f"Failed to get URL for artist {artist_id}:")
132
+ print(e)
133
+ features['artist_url'] = np.nan
134
+ tracks.append(features)
135
+ print(f"{idx+1}: {features['name']} - {features['track_name']}")
136
+ return tracks
137
+
138
+ def get_matching_artists(tracks):
139
+ artist_features = []
140
+ for idx, item in tqdm(enumerate(tracks)):
141
+ artist_id = item['artist_id']
142
+ artist = sp.artist(artist_id)
143
+ genres = artist.get('genres',[])
144
+ if len(genres)<1:
145
+ continue
146
+ matched_artists = find_artists_with_matching_genres([x for x in genres])
147
+ if len(matched_artists) <= 1: #If it only managed to match itself
148
+ continue
149
+ for artist in matched_artists:
150
+ artist_id = artist.get('id',"")
151
+ if artist_id == "" or artist_id in [x['artist_id'] for x in tracks+artist_features]:
152
+ continue
153
+ #print(artist.get('name', 'UNKNOWN ARTIST'))
154
+ features = get_top_track_features(artist_id)
155
+ if len(features.keys())==0:
156
+ continue
157
+ features['artist_id'] = artist_id
158
+ features['name'] = artist['name']
159
+ features['artist_url'] = artist['artist_url']
160
+ artist_features.append(features)
161
+ return artist_features
162
+
163
+ def get_closest_artists(df, features):
164
+ all_feats = StandardScaler().fit_transform(df[features])
165
+ all_feats_ref = all_feats[df[df.source=="reference"].index]
166
+ all_feats_new = all_feats[df[df.source=="artist"].index]
167
+ result_distances = average_cosine_distance(all_feats_ref, all_feats_new)
168
+ results = pd.DataFrame({"artist": df[df.source == "artist"]['name'].values, "artist_id": df[df.source == "artist"]['artist_id'].values,
169
+ "distance": result_distances,
170
+ 'artist_url': df[df.source == "artist"]['artist_url']})
171
+ return results.sort_values("distance").head(10)
172
+
173
+ def generate_playlist(closest_artists, artists = []):
174
+ playlist_name = f"Recommended_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
175
+ playlist_desc = f"Top 10 recommended artists based on supplied artists: {', '.join(artists)}" if len(artists) > 0 else "Top 10 recommended artists from recently played."
176
+ user_id = sp.current_user()['id']
177
+ new_playlist = sp.user_playlist_create(user_id, playlist_name, description=playlist_desc)
178
+ playlist_id = new_playlist['id']
179
+ for i,r in closest_artists.iterrows():
180
+ print(r)
181
+ artist = r.artist
182
+ artist_id = r.artist_id
183
+ top_tracks = sp.artist_top_tracks(artist_id, country='US')['tracks']
184
+ if len(top_tracks) == 0:
185
+ continue
186
+ else:
187
+ top_track_uri = top_tracks[0]['uri']
188
+ sp.playlist_add_items(playlist_id, [top_track_uri])
189
+
190
+ if "name" in new_playlist and "external_urls" in new_playlist:
191
+ print("Created playlist:", new_playlist['name'], "with URL:", new_playlist['external_urls']['spotify'])
192
+ return True
193
+ else:
194
+ print("failed to create new playlist")
195
+ return False
196
+
197
+ def main():
198
+ parser = argparse.ArgumentParser(description="Spotify artist recommender. Requires a JSON with spotify credentials "
199
+ "(see credentials.json.example). Can also take a comma separated list "
200
+ "of artists instead of looking up last played.")
201
+ parser.add_argument('--creds', type=str, help='Path to credentials json file', required=True)
202
+ parser.add_argument('--artists', type=str, help='Comma separated list of artists', default="")
203
+ parser.add_argument('--playlist', action='store_true', help='Create a Spotify playlist if set ("Recommended_timstamp")')
204
+ args = parser.parse_args()
205
+
206
+ print("Initializing Spotify")
207
+ initialize_spotify_client(args.creds)
208
+ try:
209
+ _ = sp.current_user()
210
+ except:
211
+ print("Failed to initialize Spotify, are credentials correct?")
212
+ sys.exit()
213
+ tracks = get_recently_played(selected_artists=args.artists)
214
+ print("Getting reference features")
215
+ reference_df = pd.DataFrame.from_records(tracks)
216
+ print("Getting matching artist features")
217
+ artist_features = get_matching_artists(tracks)
218
+ artist_df = pd.DataFrame(artist_features)
219
+ reference_df['source'] = 'reference'
220
+ artist_df['source'] = 'artist'
221
+ df = pd.concat([reference_df, artist_df],ignore_index=True)
222
+ closest_artists = get_closest_artists(df, MUSIC_FEATURES+SHEET_FEATURES)
223
+ closest_artists.to_csv("closest_artists.csv", index=False)
224
+ if args.playlist:
225
+ generate_playlist(closest_artists, args.artists)
226
+
227
+ if __name__ == "__main__":
228
+ main()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ pandas
2
+ spotipy==2.23.0
3
+ scikit-learn
4
+ numpy
5
+ scipy
6
+ tqdm