nandovallec commited on
Commit
51245ea
·
1 Parent(s): c9bd358
fetchPlaylistTrackUris.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import base64
3
+ import json
4
+ import os
5
+ import sys
6
+
7
+ client_id = os.environ["CLIENT_ID"]
8
+ client_secret= os.environ["CLIENT_SECRET"]
9
+
10
+ def get_playlist_track_uris(playlist_id):
11
+ access_token = get_access_token(client_id, client_secret)
12
+
13
+ playlist_data = get_playlist_data(access_token, playlist_id)
14
+
15
+ # Output the playlist data to a file
16
+ # with open('playlist-tracks.json', 'w') as outfile:
17
+ # json.dump(json.loads(playlist_response.text), outfile)
18
+
19
+ track_uris = [item['track']['uri'] for item in playlist_data['tracks']['items']]
20
+ print(track_uris)
21
+
22
+ # Output the track uris into a file
23
+ # with open('track-uris-new.txt', 'w') as output_file:
24
+ # output_file.write('\n'.join(track_uris))
25
+
26
+ return track_uris
27
+
28
+
29
+
30
+ def get_access_token(client_id, client_secret) -> str:
31
+ base64_string = base64.b64encode((client_id + ':' + client_secret).encode('ascii')).decode('ascii')
32
+
33
+ auth_headers = {
34
+ 'Authorization': 'Basic ' + base64_string,
35
+ 'Content-type': 'application/x-www-form-urlencoded'
36
+ }
37
+ auth_data = {'grant_type': 'client_credentials'}
38
+
39
+ auth_response = requests.post('https://accounts.spotify.com/api/token', headers=auth_headers, json=True, data=auth_data)
40
+ access_token = json.loads(auth_response.text)['access_token']
41
+
42
+ return access_token
43
+
44
+ def get_playlist_data(access_token, playlist_id):
45
+ get_playlist_headers = {
46
+ 'Authorization': 'Bearer ' + access_token,
47
+ 'Content-Type': 'application/json',
48
+ }
49
+
50
+ playlist_response = requests.get('https://api.spotify.com/v1/playlists/' + playlist_id, headers=get_playlist_headers)
51
+ playlist_data = json.loads(playlist_response.text)
52
+
53
+ return playlist_data
54
+
55
+ if __name__ == "__main__":
56
+ playlist_id = sys.argv[1]
57
+
58
+ get_playlist_track_uris(playlist_id)
model/dict_tid2uri.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b52797435b4c60789b15afd28f846064645898376cfd3e4aabc36609770477cb
3
+ size 30017867
model/dict_uri2tid.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85fe3ebd1c087df637f92f561c48f8de71f3edee0dc357a42e60fb906f3c88cf
3
+ size 30017867
recommender.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import sklearn.preprocessing as pp
3
+ from scipy.sparse import csr_matrix
4
+ import numpy as np
5
+ import pandas as pd
6
+
7
+
8
+ def inference_row(list_tid, ps_matrix):
9
+ ps_matrix_norm = pp.normalize(ps_matrix, axis=1)
10
+ length_tid = len(list_tid)
11
+ n_songs = ps_matrix.shape[1]
12
+ sparse_row = csr_matrix((np.ones(length_tid), (np.zeros(length_tid), list_tid)), shape=(1, n_songs))
13
+ sparse_row_norm = pp.normalize(sparse_row, axis=1)
14
+
15
+ return sparse_row_norm * ps_matrix_norm.T, sparse_row
16
+
17
+
18
+ def get_best_tid(current_list, ps_matrix_row, K=50, MAX_tid=10):
19
+ df_ps_train = pd.read_hdf('model/df_ps_train_new.hdf')
20
+ sim_vector, sparse_row = inference_row(current_list, ps_matrix_row)
21
+ sim_vector = sim_vector.toarray()[0].tolist()
22
+
23
+ # Enumerate index and rating
24
+ counter_list = list(enumerate(sim_vector, 0))
25
+
26
+ # Sort by rating
27
+ sortedList = sorted(counter_list, key=lambda x: x[1], reverse=True)
28
+
29
+ topK_pid = [i for i, _ in sortedList[1:K + 1]]
30
+
31
+ n = 0
32
+
33
+ while (1):
34
+
35
+ top_pid = topK_pid[n]
36
+
37
+ add_tid_list = df_ps_train.loc[top_pid].tid
38
+
39
+ # Form new list
40
+ new_tid_list = current_list + add_tid_list
41
+ new_tid_list = list(dict.fromkeys(new_tid_list))
42
+
43
+ # Check number of songs and Add to data for prediction
44
+ total_song = len(new_tid_list)
45
+ # print("n: {}\t total_song: {}".format(n,total_song))
46
+ if (total_song > MAX_tid):
47
+ new_tid_list = new_tid_list[:MAX_tid]
48
+ # Add
49
+ current_list = new_tid_list
50
+ break
51
+ else:
52
+ current_list = new_tid_list
53
+ n += 1
54
+ if (n == K):
55
+ break
56
+
57
+ return current_list
58
+
59
+
60
+ def inference_from_tid(list_tid, K=50, MAX_tid=10):
61
+ pickle_path = 'model/giantMatrix_new.pickle'
62
+ # pickle_path = 'data/giantMatrix_truth_new.pickle'
63
+
64
+ with open(pickle_path, 'rb') as f:
65
+ ps_matrix = pickle.load(f)
66
+
67
+ ps_matrix_row = ps_matrix.tocsr()
68
+
69
+ return get_best_tid(list_tid, ps_matrix.tocsr(), K, MAX_tid)
70
+
71
+
72
+ def inference_from_uri(list_uri, K=50, MAX_tid=10):
73
+ with open('model/dict_uri2tid.pkl', 'rb') as f:
74
+ dict_uri2tid = pickle.load(f)
75
+ list_tid = [dict_uri2tid[x] for x in list_uri if x in dict_uri2tid]
76
+ best_tid = inference_from_tid(list_tid, K, MAX_tid)
77
+
78
+ with open('model/dict_tid2uri.pkl', 'rb') as f:
79
+ dict_tid2uri = pickle.load(f)
80
+ best_uri = [dict_tid2uri[x] for x in best_tid]
81
+ return best_uri