Upload 3 files
Browse files- app.py +62 -0
- artist_recommender.py +228 -0
- requirements.txt +6 -0
app.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import sys
|
4 |
+
sys.path.extend(["."])
|
5 |
+
import artist_recommender
|
6 |
+
|
7 |
+
def export_csv(ad_output):
|
8 |
+
ad_output.to_csv("output.csv", index=False)
|
9 |
+
return gr.File(value="output.csv", visible=True)
|
10 |
+
|
11 |
+
def get_artist_recommendations(client_id, client_secret, client_redirect_uri, artist_list, create_playlist):
|
12 |
+
print("Initializing Spotify")
|
13 |
+
creds = {
|
14 |
+
"SPOTIPY_CLIENT_ID": client_id,
|
15 |
+
"SPOTIPY_CLIENT_SECRET": client_secret,
|
16 |
+
"SPOTIPY_REDIRECT_URI": client_redirect_uri
|
17 |
+
}
|
18 |
+
artist_recommender.initialize_spotify_client(creds)
|
19 |
+
sp = artist_recommender.sp
|
20 |
+
try:
|
21 |
+
_ = sp.current_user()
|
22 |
+
except:
|
23 |
+
print("Failed to initialize Spotify, are credentials correct?")
|
24 |
+
sys.exit()
|
25 |
+
tracks = artist_recommender.get_recently_played(selected_artists=artist_list)
|
26 |
+
print("Getting reference features")
|
27 |
+
reference_df = pd.DataFrame.from_records(tracks)
|
28 |
+
print("Getting matching artist features")
|
29 |
+
artist_features = artist_recommender.get_matching_artists(tracks)
|
30 |
+
artist_df = pd.DataFrame(artist_features)
|
31 |
+
reference_df['source'] = 'reference'
|
32 |
+
artist_df['source'] = 'artist'
|
33 |
+
df = pd.concat([reference_df, artist_df],ignore_index=True)
|
34 |
+
closest_artists = artist_recommender.get_closest_artists(df, artist_recommender.MUSIC_FEATURES+artist_recommender.SHEET_FEATURES)
|
35 |
+
closest_artists.to_csv("closest_artists.csv", index=False)
|
36 |
+
if create_playlist:
|
37 |
+
artist_recommender.generate_playlist(closest_artists, artist_list)
|
38 |
+
return closest_artists
|
39 |
+
|
40 |
+
with gr.Blocks() as demo:
|
41 |
+
gr.Markdown(
|
42 |
+
"""
|
43 |
+
# Spotify Artist Recommender
|
44 |
+
"""
|
45 |
+
)
|
46 |
+
client_id = gr.Textbox(label="SPOTIFY_CLIENT_ID", value="")
|
47 |
+
client_secret = gr.Textbox(label="SPOTIFY_CLIENT_SECRET", value="")
|
48 |
+
client_redirect_uri = gr.Textbox(label="SPOTIFY_REDIRECT_URI", value="")
|
49 |
+
artist_list = gr.Textbox(label="Arist list (Optional). Leave blank to use your recent activity. Otherwisr a CSV of artists", value="")
|
50 |
+
create_playlist = gr.Checkbox(label="Generate a playlist", value=True)
|
51 |
+
button = gr.Button(label="Get Artist Recommendations")
|
52 |
+
output1 = gr.DataFrame(headers=['artist', 'artist_id', 'distance', 'artist_url'], interactive=False, wrap=True)
|
53 |
+
button.click(fn=get_artist_recommendations,
|
54 |
+
inputs=[client_id, client_secret, client_redirect_uri, artist_list, create_playlist],
|
55 |
+
outputs=output1)
|
56 |
+
|
57 |
+
export_button = gr.Button("Export Recommendations")
|
58 |
+
csv = gr.File(interactive=False, visible=False)
|
59 |
+
export_button.click(fn=export_csv, inputs=[output1], outputs=[csv])
|
60 |
+
|
61 |
+
if __name__ == '__main__':
|
62 |
+
demo.launch()
|
artist_recommender.py
ADDED
@@ -0,0 +1,228 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import spotipy
|
3 |
+
from spotipy.oauth2 import SpotifyOAuth
|
4 |
+
import random
|
5 |
+
from sklearn.preprocessing import StandardScaler
|
6 |
+
import numpy as np
|
7 |
+
from scipy.spatial.distance import cosine
|
8 |
+
import json
|
9 |
+
from tqdm import tqdm
|
10 |
+
import argparse
|
11 |
+
import sys
|
12 |
+
from datetime import datetime
|
13 |
+
pd.set_option('display.max_colwidth', None)
|
14 |
+
|
15 |
+
sp = None
|
16 |
+
|
17 |
+
MUSIC_FEATURES = ["danceability", "energy", "loudness", "speechiness", "acousticness", "instrumentalness", "liveness", "valence"]
|
18 |
+
SHEET_FEATURES = ["key", "mode", "tempo"]
|
19 |
+
|
20 |
+
def initialize_spotify_client(credentials_file, isfile=True):
|
21 |
+
global sp
|
22 |
+
if isfile:
|
23 |
+
creds = json.load(open(credentials_file))
|
24 |
+
else:
|
25 |
+
creds = credentials_file
|
26 |
+
SPOTIPY_CLIENT_ID = creds['SPOTIPY_CLIENT_ID']
|
27 |
+
SPOTIPY_CLIENT_SECRET = creds['SPOTIPY_CLIENT_SECRET']
|
28 |
+
SPOTIPY_REDIRECT_URI = creds['SPOTIPY_REDIRECT_URI']
|
29 |
+
SCOPE = 'playlist-modify-public user-read-recently-played'
|
30 |
+
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=SPOTIPY_CLIENT_ID,
|
31 |
+
client_secret=SPOTIPY_CLIENT_SECRET,
|
32 |
+
redirect_uri=SPOTIPY_REDIRECT_URI,
|
33 |
+
scope=SCOPE))
|
34 |
+
|
35 |
+
def find_artists_with_matching_genres(target_genres):
|
36 |
+
matched_artists = []
|
37 |
+
# Try searching for a perfect match of genres then randomly remove one genre at a time until get at least 100
|
38 |
+
while len(target_genres) > 0 and len(matched_artists) < 10:
|
39 |
+
print(f"Trying {target_genres}")
|
40 |
+
query = " AND ".join([f"genre:\"{genre}\"" for genre in target_genres])
|
41 |
+
artist_results = sp.search(q=query, type='artist', limit=10)
|
42 |
+
matched_artists.extend([{'id':artist['id'], 'name':artist['name'], 'artist_url':artist['external_urls']['spotify']} for artist in artist_results['artists']['items'] if artist not in matched_artists])
|
43 |
+
random_item = random.choice(target_genres)
|
44 |
+
target_genres.remove(random_item)
|
45 |
+
return matched_artists
|
46 |
+
|
47 |
+
def get_top_track_features(artist_id):
|
48 |
+
try:
|
49 |
+
top_tracks = sp.artist_top_tracks(artist_id, country='US')['tracks']
|
50 |
+
if top_tracks:
|
51 |
+
top_track_id = top_tracks[0]['id']
|
52 |
+
top_track_name = top_tracks[0]['name']
|
53 |
+
features = sp.audio_features([top_track_id])[0]
|
54 |
+
features['track_id'] = top_track_id
|
55 |
+
features['track_name'] = top_track_name
|
56 |
+
if not features:
|
57 |
+
print("Audio features could not be retrieved.")
|
58 |
+
features = {}
|
59 |
+
else:
|
60 |
+
print("No top tracks found for this artist.")
|
61 |
+
features = {}
|
62 |
+
except Exception as e:
|
63 |
+
print(f"Error retrieving top track for artist: {e}")
|
64 |
+
features = {}
|
65 |
+
return features
|
66 |
+
|
67 |
+
def average_cosine_distance(A, B):
|
68 |
+
average_distances = []
|
69 |
+
for b in B:
|
70 |
+
distances = [cosine(b, a) for a in A]
|
71 |
+
average_distances.append(np.mean(distances))
|
72 |
+
return np.array(average_distances)
|
73 |
+
|
74 |
+
def get_recently_played(limit=50, selected_artists = ""):
|
75 |
+
if len(selected_artists) == 0:
|
76 |
+
print("Getting most recently played artists")
|
77 |
+
results = sp.current_user_recently_played(limit=limit)
|
78 |
+
else:
|
79 |
+
selected_artists = [x.strip() for x in selected_artists.split(",")]
|
80 |
+
print(f"Using supplied list of artists (first 10 artists only)")
|
81 |
+
results = {'items':[]}
|
82 |
+
for artist in selected_artists[:10]:
|
83 |
+
artist_objects = sp.search(q=f"artist: {artist}", type='artist')
|
84 |
+
try:
|
85 |
+
artist_objects = sorted([x for x in artist_objects['artists']['items'] if x['name'].lower() == artist.lower()],
|
86 |
+
key= lambda x: x['popularity'], reverse=True)
|
87 |
+
except KeyError:
|
88 |
+
artist_objects = []
|
89 |
+
if len(artist_objects) == 0:
|
90 |
+
print(f"{artist} not found")
|
91 |
+
continue
|
92 |
+
elif len(artist_objects) > 1:
|
93 |
+
print(f"Multiple artist_id found for {artist}, selecting the most popular artist_id in the list.")
|
94 |
+
artist_id = artist_objects[0]['id']
|
95 |
+
try:
|
96 |
+
artist_url = artist_objects[0]['external_urls']['spotify']
|
97 |
+
except KeyError:
|
98 |
+
artist_url = ""
|
99 |
+
results['items'].append({
|
100 |
+
'track': {
|
101 |
+
'artists': [{'id': artist_id,
|
102 |
+
'name': artist,
|
103 |
+
'external_urls': {'spotify': artist_url}}]}})
|
104 |
+
tracks = []
|
105 |
+
for idx, item in enumerate(results['items']):
|
106 |
+
try:
|
107 |
+
track = item['track']
|
108 |
+
artist_id = track['artists'][0]['id']
|
109 |
+
except KeyError as e:
|
110 |
+
print(f"Failed index {idx}:")
|
111 |
+
print(e)
|
112 |
+
continue
|
113 |
+
if artist_id in [x['artist_id'] for x in tracks]:
|
114 |
+
continue
|
115 |
+
elif len(tracks) >= 10:
|
116 |
+
break
|
117 |
+
features = get_top_track_features(artist_id)
|
118 |
+
if len(features.keys()) == 0:
|
119 |
+
print(f"No features found for artist: {artist_id}")
|
120 |
+
continue
|
121 |
+
try:
|
122 |
+
features['name'] = track['artists'][0]['name']
|
123 |
+
except KeyError as e:
|
124 |
+
print(f"Failed to get artists name for {artist_id}:")
|
125 |
+
print(e)
|
126 |
+
features['name'] = np.nan
|
127 |
+
features['artist_id'] = artist_id
|
128 |
+
try:
|
129 |
+
features['artist_url'] = track['artists'][0]['external_urls']['spotify']
|
130 |
+
except KeyError as e:
|
131 |
+
print(f"Failed to get URL for artist {artist_id}:")
|
132 |
+
print(e)
|
133 |
+
features['artist_url'] = np.nan
|
134 |
+
tracks.append(features)
|
135 |
+
print(f"{idx+1}: {features['name']} - {features['track_name']}")
|
136 |
+
return tracks
|
137 |
+
|
138 |
+
def get_matching_artists(tracks):
|
139 |
+
artist_features = []
|
140 |
+
for idx, item in tqdm(enumerate(tracks)):
|
141 |
+
artist_id = item['artist_id']
|
142 |
+
artist = sp.artist(artist_id)
|
143 |
+
genres = artist.get('genres',[])
|
144 |
+
if len(genres)<1:
|
145 |
+
continue
|
146 |
+
matched_artists = find_artists_with_matching_genres([x for x in genres])
|
147 |
+
if len(matched_artists) <= 1: #If it only managed to match itself
|
148 |
+
continue
|
149 |
+
for artist in matched_artists:
|
150 |
+
artist_id = artist.get('id',"")
|
151 |
+
if artist_id == "" or artist_id in [x['artist_id'] for x in tracks+artist_features]:
|
152 |
+
continue
|
153 |
+
#print(artist.get('name', 'UNKNOWN ARTIST'))
|
154 |
+
features = get_top_track_features(artist_id)
|
155 |
+
if len(features.keys())==0:
|
156 |
+
continue
|
157 |
+
features['artist_id'] = artist_id
|
158 |
+
features['name'] = artist['name']
|
159 |
+
features['artist_url'] = artist['artist_url']
|
160 |
+
artist_features.append(features)
|
161 |
+
return artist_features
|
162 |
+
|
163 |
+
def get_closest_artists(df, features):
|
164 |
+
all_feats = StandardScaler().fit_transform(df[features])
|
165 |
+
all_feats_ref = all_feats[df[df.source=="reference"].index]
|
166 |
+
all_feats_new = all_feats[df[df.source=="artist"].index]
|
167 |
+
result_distances = average_cosine_distance(all_feats_ref, all_feats_new)
|
168 |
+
results = pd.DataFrame({"artist": df[df.source == "artist"]['name'].values, "artist_id": df[df.source == "artist"]['artist_id'].values,
|
169 |
+
"distance": result_distances,
|
170 |
+
'artist_url': df[df.source == "artist"]['artist_url']})
|
171 |
+
return results.sort_values("distance").head(10)
|
172 |
+
|
173 |
+
def generate_playlist(closest_artists, artists = []):
|
174 |
+
playlist_name = f"Recommended_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
175 |
+
playlist_desc = f"Top 10 recommended artists based on supplied artists: {', '.join(artists)}" if len(artists) > 0 else "Top 10 recommended artists from recently played."
|
176 |
+
user_id = sp.current_user()['id']
|
177 |
+
new_playlist = sp.user_playlist_create(user_id, playlist_name, description=playlist_desc)
|
178 |
+
playlist_id = new_playlist['id']
|
179 |
+
for i,r in closest_artists.iterrows():
|
180 |
+
print(r)
|
181 |
+
artist = r.artist
|
182 |
+
artist_id = r.artist_id
|
183 |
+
top_tracks = sp.artist_top_tracks(artist_id, country='US')['tracks']
|
184 |
+
if len(top_tracks) == 0:
|
185 |
+
continue
|
186 |
+
else:
|
187 |
+
top_track_uri = top_tracks[0]['uri']
|
188 |
+
sp.playlist_add_items(playlist_id, [top_track_uri])
|
189 |
+
|
190 |
+
if "name" in new_playlist and "external_urls" in new_playlist:
|
191 |
+
print("Created playlist:", new_playlist['name'], "with URL:", new_playlist['external_urls']['spotify'])
|
192 |
+
return True
|
193 |
+
else:
|
194 |
+
print("failed to create new playlist")
|
195 |
+
return False
|
196 |
+
|
197 |
+
def main():
|
198 |
+
parser = argparse.ArgumentParser(description="Spotify artist recommender. Requires a JSON with spotify credentials "
|
199 |
+
"(see credentials.json.example). Can also take a comma separated list "
|
200 |
+
"of artists instead of looking up last played.")
|
201 |
+
parser.add_argument('--creds', type=str, help='Path to credentials json file', required=True)
|
202 |
+
parser.add_argument('--artists', type=str, help='Comma separated list of artists', default="")
|
203 |
+
parser.add_argument('--playlist', action='store_true', help='Create a Spotify playlist if set ("Recommended_timstamp")')
|
204 |
+
args = parser.parse_args()
|
205 |
+
|
206 |
+
print("Initializing Spotify")
|
207 |
+
initialize_spotify_client(args.creds)
|
208 |
+
try:
|
209 |
+
_ = sp.current_user()
|
210 |
+
except:
|
211 |
+
print("Failed to initialize Spotify, are credentials correct?")
|
212 |
+
sys.exit()
|
213 |
+
tracks = get_recently_played(selected_artists=args.artists)
|
214 |
+
print("Getting reference features")
|
215 |
+
reference_df = pd.DataFrame.from_records(tracks)
|
216 |
+
print("Getting matching artist features")
|
217 |
+
artist_features = get_matching_artists(tracks)
|
218 |
+
artist_df = pd.DataFrame(artist_features)
|
219 |
+
reference_df['source'] = 'reference'
|
220 |
+
artist_df['source'] = 'artist'
|
221 |
+
df = pd.concat([reference_df, artist_df],ignore_index=True)
|
222 |
+
closest_artists = get_closest_artists(df, MUSIC_FEATURES+SHEET_FEATURES)
|
223 |
+
closest_artists.to_csv("closest_artists.csv", index=False)
|
224 |
+
if args.playlist:
|
225 |
+
generate_playlist(closest_artists, args.artists)
|
226 |
+
|
227 |
+
if __name__ == "__main__":
|
228 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pandas
|
2 |
+
spotipy==2.23.0
|
3 |
+
scikit-learn
|
4 |
+
numpy
|
5 |
+
scipy
|
6 |
+
tqdm
|