Spaces:
Runtime error
Runtime error
nandovallec
commited on
Commit
·
51245ea
1
Parent(s):
c9bd358
Initial
Browse files- fetchPlaylistTrackUris.py +58 -0
- model/dict_tid2uri.pkl +3 -0
- model/dict_uri2tid.pkl +3 -0
- recommender.py +81 -0
fetchPlaylistTrackUris.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import base64
|
3 |
+
import json
|
4 |
+
import os
|
5 |
+
import sys
|
6 |
+
|
7 |
+
client_id = os.environ["CLIENT_ID"]
|
8 |
+
client_secret= os.environ["CLIENT_SECRET"]
|
9 |
+
|
10 |
+
def get_playlist_track_uris(playlist_id):
|
11 |
+
access_token = get_access_token(client_id, client_secret)
|
12 |
+
|
13 |
+
playlist_data = get_playlist_data(access_token, playlist_id)
|
14 |
+
|
15 |
+
# Output the playlist data to a file
|
16 |
+
# with open('playlist-tracks.json', 'w') as outfile:
|
17 |
+
# json.dump(json.loads(playlist_response.text), outfile)
|
18 |
+
|
19 |
+
track_uris = [item['track']['uri'] for item in playlist_data['tracks']['items']]
|
20 |
+
print(track_uris)
|
21 |
+
|
22 |
+
# Output the track uris into a file
|
23 |
+
# with open('track-uris-new.txt', 'w') as output_file:
|
24 |
+
# output_file.write('\n'.join(track_uris))
|
25 |
+
|
26 |
+
return track_uris
|
27 |
+
|
28 |
+
|
29 |
+
|
30 |
+
def get_access_token(client_id, client_secret) -> str:
|
31 |
+
base64_string = base64.b64encode((client_id + ':' + client_secret).encode('ascii')).decode('ascii')
|
32 |
+
|
33 |
+
auth_headers = {
|
34 |
+
'Authorization': 'Basic ' + base64_string,
|
35 |
+
'Content-type': 'application/x-www-form-urlencoded'
|
36 |
+
}
|
37 |
+
auth_data = {'grant_type': 'client_credentials'}
|
38 |
+
|
39 |
+
auth_response = requests.post('https://accounts.spotify.com/api/token', headers=auth_headers, json=True, data=auth_data)
|
40 |
+
access_token = json.loads(auth_response.text)['access_token']
|
41 |
+
|
42 |
+
return access_token
|
43 |
+
|
44 |
+
def get_playlist_data(access_token, playlist_id):
|
45 |
+
get_playlist_headers = {
|
46 |
+
'Authorization': 'Bearer ' + access_token,
|
47 |
+
'Content-Type': 'application/json',
|
48 |
+
}
|
49 |
+
|
50 |
+
playlist_response = requests.get('https://api.spotify.com/v1/playlists/' + playlist_id, headers=get_playlist_headers)
|
51 |
+
playlist_data = json.loads(playlist_response.text)
|
52 |
+
|
53 |
+
return playlist_data
|
54 |
+
|
55 |
+
if __name__ == "__main__":
|
56 |
+
playlist_id = sys.argv[1]
|
57 |
+
|
58 |
+
get_playlist_track_uris(playlist_id)
|
model/dict_tid2uri.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b52797435b4c60789b15afd28f846064645898376cfd3e4aabc36609770477cb
|
3 |
+
size 30017867
|
model/dict_uri2tid.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85fe3ebd1c087df637f92f561c48f8de71f3edee0dc357a42e60fb906f3c88cf
|
3 |
+
size 30017867
|
recommender.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
import sklearn.preprocessing as pp
|
3 |
+
from scipy.sparse import csr_matrix
|
4 |
+
import numpy as np
|
5 |
+
import pandas as pd
|
6 |
+
|
7 |
+
|
8 |
+
def inference_row(list_tid, ps_matrix):
|
9 |
+
ps_matrix_norm = pp.normalize(ps_matrix, axis=1)
|
10 |
+
length_tid = len(list_tid)
|
11 |
+
n_songs = ps_matrix.shape[1]
|
12 |
+
sparse_row = csr_matrix((np.ones(length_tid), (np.zeros(length_tid), list_tid)), shape=(1, n_songs))
|
13 |
+
sparse_row_norm = pp.normalize(sparse_row, axis=1)
|
14 |
+
|
15 |
+
return sparse_row_norm * ps_matrix_norm.T, sparse_row
|
16 |
+
|
17 |
+
|
18 |
+
def get_best_tid(current_list, ps_matrix_row, K=50, MAX_tid=10):
|
19 |
+
df_ps_train = pd.read_hdf('model/df_ps_train_new.hdf')
|
20 |
+
sim_vector, sparse_row = inference_row(current_list, ps_matrix_row)
|
21 |
+
sim_vector = sim_vector.toarray()[0].tolist()
|
22 |
+
|
23 |
+
# Enumerate index and rating
|
24 |
+
counter_list = list(enumerate(sim_vector, 0))
|
25 |
+
|
26 |
+
# Sort by rating
|
27 |
+
sortedList = sorted(counter_list, key=lambda x: x[1], reverse=True)
|
28 |
+
|
29 |
+
topK_pid = [i for i, _ in sortedList[1:K + 1]]
|
30 |
+
|
31 |
+
n = 0
|
32 |
+
|
33 |
+
while (1):
|
34 |
+
|
35 |
+
top_pid = topK_pid[n]
|
36 |
+
|
37 |
+
add_tid_list = df_ps_train.loc[top_pid].tid
|
38 |
+
|
39 |
+
# Form new list
|
40 |
+
new_tid_list = current_list + add_tid_list
|
41 |
+
new_tid_list = list(dict.fromkeys(new_tid_list))
|
42 |
+
|
43 |
+
# Check number of songs and Add to data for prediction
|
44 |
+
total_song = len(new_tid_list)
|
45 |
+
# print("n: {}\t total_song: {}".format(n,total_song))
|
46 |
+
if (total_song > MAX_tid):
|
47 |
+
new_tid_list = new_tid_list[:MAX_tid]
|
48 |
+
# Add
|
49 |
+
current_list = new_tid_list
|
50 |
+
break
|
51 |
+
else:
|
52 |
+
current_list = new_tid_list
|
53 |
+
n += 1
|
54 |
+
if (n == K):
|
55 |
+
break
|
56 |
+
|
57 |
+
return current_list
|
58 |
+
|
59 |
+
|
60 |
+
def inference_from_tid(list_tid, K=50, MAX_tid=10):
|
61 |
+
pickle_path = 'model/giantMatrix_new.pickle'
|
62 |
+
# pickle_path = 'data/giantMatrix_truth_new.pickle'
|
63 |
+
|
64 |
+
with open(pickle_path, 'rb') as f:
|
65 |
+
ps_matrix = pickle.load(f)
|
66 |
+
|
67 |
+
ps_matrix_row = ps_matrix.tocsr()
|
68 |
+
|
69 |
+
return get_best_tid(list_tid, ps_matrix.tocsr(), K, MAX_tid)
|
70 |
+
|
71 |
+
|
72 |
+
def inference_from_uri(list_uri, K=50, MAX_tid=10):
|
73 |
+
with open('model/dict_uri2tid.pkl', 'rb') as f:
|
74 |
+
dict_uri2tid = pickle.load(f)
|
75 |
+
list_tid = [dict_uri2tid[x] for x in list_uri if x in dict_uri2tid]
|
76 |
+
best_tid = inference_from_tid(list_tid, K, MAX_tid)
|
77 |
+
|
78 |
+
with open('model/dict_tid2uri.pkl', 'rb') as f:
|
79 |
+
dict_tid2uri = pickle.load(f)
|
80 |
+
best_uri = [dict_tid2uri[x] for x in best_tid]
|
81 |
+
return best_uri
|