Amit Kumar
commited on
Commit
•
f636680
1
Parent(s):
02a1703
initial commit
Browse files- .env +2 -0
- .gitattributes +1 -0
- app.py +89 -0
- kmeans_clustered_spotify_dataset.csv +3 -0
- recommendations.py +112 -0
- requirements.txt +7 -0
.env
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
SPOTIFY_CLIENT_ID=111597a76d6a42f3add7008145f16284
|
2 |
+
SPOTIFY_CLIENT_SECRET=dbb5781e8e8a4d39a012a2ad27fb2e27
|
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
kmeans_clustered_spotify_dataset.csv filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import spotipy
|
3 |
+
import gradio as gr
|
4 |
+
import pandas as pd
|
5 |
+
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
from recommendations import recommend_songs
|
8 |
+
from spotipy.oauth2 import SpotifyClientCredentials
|
9 |
+
|
10 |
+
# Load environment variables from .env file
|
11 |
+
load_dotenv()
|
12 |
+
|
13 |
+
# Access the Spotify API credentials
|
14 |
+
client_id = os.getenv('SPOTIFY_CLIENT_ID')
|
15 |
+
client_secret = os.getenv('SPOTIFY_CLIENT_SECRET')
|
16 |
+
|
17 |
+
# Authenticate with the Spotify API
|
18 |
+
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))
|
19 |
+
|
20 |
+
data = pd.read_csv("kmeans_clustered_spotify_dataset.csv")
|
21 |
+
|
22 |
+
def fetch_song_cover(song_name):
|
23 |
+
# Search for the song
|
24 |
+
results = sp.search(q=song_name, limit=1, type='track')
|
25 |
+
if results['tracks']['items']:
|
26 |
+
song = results['tracks']['items'][0]
|
27 |
+
cover_url = song['album']['images'][0]['url']
|
28 |
+
user_song_name = song['name']
|
29 |
+
|
30 |
+
user_song = song['name']
|
31 |
+
|
32 |
+
return cover_url, song['name'], song['artists'][0]['name']
|
33 |
+
else:
|
34 |
+
return None, "Song not found", "Artist not found"
|
35 |
+
|
36 |
+
def get_recommendations(song_name):
|
37 |
+
suggestions = recommend_songs(song_list=[{'name': song_name}], spotify_data=data)
|
38 |
+
song_covers = []
|
39 |
+
|
40 |
+
for suggestion in suggestions:
|
41 |
+
print(suggestion)
|
42 |
+
cover = fetch_song_cover(suggestion["name"])
|
43 |
+
song_covers.append(cover[0])
|
44 |
+
|
45 |
+
return song_covers
|
46 |
+
|
47 |
+
# Gradio Interface
|
48 |
+
def gradio_interface(song_name):
|
49 |
+
cover_url, song_name, artist_name = fetch_song_cover(song_name)
|
50 |
+
|
51 |
+
if cover_url:
|
52 |
+
return cover_url, f"Song: {song_name}", f"Artist: {artist_name}", gr.update(visible=True), gr.update(visible=True)
|
53 |
+
else:
|
54 |
+
return None, "Song not found", "Artist not found"
|
55 |
+
|
56 |
+
# Creating Gradio Interface
|
57 |
+
with gr.Blocks() as demo:
|
58 |
+
gr.Markdown("# Music Recommendation System using Spotify Dataset")
|
59 |
+
|
60 |
+
with gr.Row():
|
61 |
+
with gr.Column():
|
62 |
+
song_input = gr.Textbox(label="Enter Song Name")
|
63 |
+
search_button = gr.Button("Find Song")
|
64 |
+
|
65 |
+
with gr.Column():
|
66 |
+
cover_output = gr.Image(label="Cover Image")
|
67 |
+
song_name_output = gr.Textbox(label="Song Name")
|
68 |
+
artist_name_output = gr.Textbox(label="Artist Name")
|
69 |
+
|
70 |
+
recommendations_labels = gr.Row(visible=False)
|
71 |
+
|
72 |
+
recommendations_songs = gr.Column(visible=False)
|
73 |
+
|
74 |
+
with recommendations_labels:
|
75 |
+
gr.Markdown("# You may also like")
|
76 |
+
|
77 |
+
with recommendations_songs:
|
78 |
+
song_covers = gr.Gallery(label="Image Gallery")
|
79 |
+
|
80 |
+
search_button.click(fn=gradio_interface,
|
81 |
+
inputs=song_input,
|
82 |
+
outputs=[cover_output, song_name_output, artist_name_output, recommendations_labels, recommendations_songs]).then(
|
83 |
+
fn=get_recommendations,
|
84 |
+
inputs=song_input,
|
85 |
+
outputs=song_covers
|
86 |
+
)
|
87 |
+
|
88 |
+
# Launching the Gradio app
|
89 |
+
demo.launch(debug=True)
|
kmeans_clustered_spotify_dataset.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab1f64fbf645f615ad389912e5fe63a23610a5f31c589f24aff0eafbc257a044
|
3 |
+
size 31824490
|
recommendations.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import os
|
3 |
+
import spotipy
|
4 |
+
import difflib
|
5 |
+
import pandas as pd
|
6 |
+
import numpy as np
|
7 |
+
from sklearn.preprocessing import StandardScaler
|
8 |
+
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
from spotipy.oauth2 import SpotifyClientCredentials
|
11 |
+
from collections import defaultdict
|
12 |
+
from sklearn.metrics import euclidean_distances
|
13 |
+
from scipy.spatial.distance import cdist
|
14 |
+
|
15 |
+
number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
|
16 |
+
'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
|
17 |
+
|
18 |
+
# Load environment variables from .env file
|
19 |
+
load_dotenv()
|
20 |
+
|
21 |
+
# Access the Spotify API credentials
|
22 |
+
client_id = os.getenv('SPOTIFY_CLIENT_ID')
|
23 |
+
client_secret = os.getenv('SPOTIFY_CLIENT_SECRET')
|
24 |
+
|
25 |
+
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id,
|
26 |
+
client_secret=client_secret))
|
27 |
+
|
28 |
+
def find_song(name):
|
29 |
+
song_data = defaultdict()
|
30 |
+
results = sp.search(q=name, limit=1, type='track')
|
31 |
+
if results["tracks"]["items"] == []:
|
32 |
+
return None
|
33 |
+
|
34 |
+
results = results["tracks"]["items"][0]
|
35 |
+
track_id = results["id"]
|
36 |
+
audio_features = sp.audio_features(track_id)[0]
|
37 |
+
|
38 |
+
song_data["name"] = [name]
|
39 |
+
song_data["year"] = [int(results['album']['release_date'].split("-")[0])]
|
40 |
+
song_data["artist"] = [results['artists'][0]['name']]
|
41 |
+
song_data["explicit"] = [int(results['explicit'])]
|
42 |
+
song_data['duration_ms'] = [results['duration_ms']]
|
43 |
+
song_data['popularity'] = [results['popularity']]
|
44 |
+
|
45 |
+
for key, value in audio_features.items():
|
46 |
+
song_data[key] = value
|
47 |
+
|
48 |
+
return pd.DataFrame(song_data)
|
49 |
+
|
50 |
+
|
51 |
+
def get_song_data(song, spotify_data):
|
52 |
+
|
53 |
+
try:
|
54 |
+
song_data = spotify_data[(spotify_data['name'] == song['name'])].iloc[0]
|
55 |
+
print(f"Finding the song in the fitted data.")
|
56 |
+
return song_data
|
57 |
+
|
58 |
+
except IndexError:
|
59 |
+
print(f"Could not find song in the fitted data. Trying to fetch online now...")
|
60 |
+
return find_song(song['name'])
|
61 |
+
|
62 |
+
|
63 |
+
def get_mean_vector(song_list, spotify_data):
|
64 |
+
|
65 |
+
song_vectors = []
|
66 |
+
|
67 |
+
for song in song_list:
|
68 |
+
song_data = get_song_data(song, spotify_data)
|
69 |
+
if song_data is None:
|
70 |
+
print('Warning: {} does not exist in Spotify or in database'.format(song['name']))
|
71 |
+
continue
|
72 |
+
song_vector = song_data[number_cols].values
|
73 |
+
song_vectors.append(song_vector)
|
74 |
+
|
75 |
+
song_matrix = np.array(list(song_vectors))
|
76 |
+
return np.mean(song_matrix, axis=0)
|
77 |
+
|
78 |
+
|
79 |
+
def flatten_dict_list(dict_list):
|
80 |
+
|
81 |
+
flattened_dict = defaultdict()
|
82 |
+
for key in dict_list[0].keys():
|
83 |
+
flattened_dict[key] = []
|
84 |
+
|
85 |
+
for dictionary in dict_list:
|
86 |
+
for key, value in dictionary.items():
|
87 |
+
flattened_dict[key].append(value)
|
88 |
+
|
89 |
+
return flattened_dict
|
90 |
+
|
91 |
+
|
92 |
+
def recommend_songs( song_list, spotify_data, n_songs=10):
|
93 |
+
|
94 |
+
metadata_cols = ['name', 'year', 'artists']
|
95 |
+
song_dict = flatten_dict_list(song_list)
|
96 |
+
|
97 |
+
# song_center = get_mean_vector(song_list, spotify_data)
|
98 |
+
|
99 |
+
song_center = get_song_data(song_list[0], spotify_data)[number_cols].values
|
100 |
+
|
101 |
+
scaler = StandardScaler()
|
102 |
+
scaled_data = scaler.transform(spotify_data[number_cols])
|
103 |
+
scaled_song_center = scaler.transform(song_center.reshape(1, -1))
|
104 |
+
distances = cdist(scaled_song_center, scaled_data, 'cosine')
|
105 |
+
index = list(np.argsort(distances)[:, :n_songs][0])
|
106 |
+
|
107 |
+
rec_songs = spotify_data.iloc[index]
|
108 |
+
rec_songs['name'] = rec_songs['name'].apply(lambda x: x.lower())
|
109 |
+
song_dict['name'] = [x.lower() for x in song_dict['name']]
|
110 |
+
|
111 |
+
rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
|
112 |
+
return rec_songs[metadata_cols].to_dict(orient='records')
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
pandas
|
3 |
+
numpy
|
4 |
+
scikit-learn
|
5 |
+
scipy
|
6 |
+
spotipy
|
7 |
+
python-dotenv
|