Amit Kumar commited on
Commit
f636680
1 Parent(s): 02a1703

initial commit

Browse files
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ SPOTIFY_CLIENT_ID=111597a76d6a42f3add7008145f16284
2
+ SPOTIFY_CLIENT_SECRET=dbb5781e8e8a4d39a012a2ad27fb2e27
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ kmeans_clustered_spotify_dataset.csv filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import spotipy
3
+ import gradio as gr
4
+ import pandas as pd
5
+
6
+ from dotenv import load_dotenv
7
+ from recommendations import recommend_songs
8
+ from spotipy.oauth2 import SpotifyClientCredentials
9
+
10
+ # Load environment variables from .env file
11
+ load_dotenv()
12
+
13
+ # Access the Spotify API credentials
14
+ client_id = os.getenv('SPOTIFY_CLIENT_ID')
15
+ client_secret = os.getenv('SPOTIFY_CLIENT_SECRET')
16
+
17
+ # Authenticate with the Spotify API
18
+ sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))
19
+
20
+ data = pd.read_csv("kmeans_clustered_spotify_dataset.csv")
21
+
22
+ def fetch_song_cover(song_name):
23
+ # Search for the song
24
+ results = sp.search(q=song_name, limit=1, type='track')
25
+ if results['tracks']['items']:
26
+ song = results['tracks']['items'][0]
27
+ cover_url = song['album']['images'][0]['url']
28
+ user_song_name = song['name']
29
+
30
+ user_song = song['name']
31
+
32
+ return cover_url, song['name'], song['artists'][0]['name']
33
+ else:
34
+ return None, "Song not found", "Artist not found"
35
+
36
+ def get_recommendations(song_name):
37
+ suggestions = recommend_songs(song_list=[{'name': song_name}], spotify_data=data)
38
+ song_covers = []
39
+
40
+ for suggestion in suggestions:
41
+ print(suggestion)
42
+ cover = fetch_song_cover(suggestion["name"])
43
+ song_covers.append(cover[0])
44
+
45
+ return song_covers
46
+
47
+ # Gradio Interface
48
+ def gradio_interface(song_name):
49
+ cover_url, song_name, artist_name = fetch_song_cover(song_name)
50
+
51
+ if cover_url:
52
+ return cover_url, f"Song: {song_name}", f"Artist: {artist_name}", gr.update(visible=True), gr.update(visible=True)
53
+ else:
54
+ return None, "Song not found", "Artist not found"
55
+
56
+ # Creating Gradio Interface
57
+ with gr.Blocks() as demo:
58
+ gr.Markdown("# Music Recommendation System using Spotify Dataset")
59
+
60
+ with gr.Row():
61
+ with gr.Column():
62
+ song_input = gr.Textbox(label="Enter Song Name")
63
+ search_button = gr.Button("Find Song")
64
+
65
+ with gr.Column():
66
+ cover_output = gr.Image(label="Cover Image")
67
+ song_name_output = gr.Textbox(label="Song Name")
68
+ artist_name_output = gr.Textbox(label="Artist Name")
69
+
70
+ recommendations_labels = gr.Row(visible=False)
71
+
72
+ recommendations_songs = gr.Column(visible=False)
73
+
74
+ with recommendations_labels:
75
+ gr.Markdown("# You may also like")
76
+
77
+ with recommendations_songs:
78
+ song_covers = gr.Gallery(label="Image Gallery")
79
+
80
+ search_button.click(fn=gradio_interface,
81
+ inputs=song_input,
82
+ outputs=[cover_output, song_name_output, artist_name_output, recommendations_labels, recommendations_songs]).then(
83
+ fn=get_recommendations,
84
+ inputs=song_input,
85
+ outputs=song_covers
86
+ )
87
+
88
+ # Launching the Gradio app
89
+ demo.launch(debug=True)
kmeans_clustered_spotify_dataset.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab1f64fbf645f615ad389912e5fe63a23610a5f31c589f24aff0eafbc257a044
3
+ size 31824490
recommendations.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import spotipy
4
+ import difflib
5
+ import pandas as pd
6
+ import numpy as np
7
+ from sklearn.preprocessing import StandardScaler
8
+
9
+ from dotenv import load_dotenv
10
+ from spotipy.oauth2 import SpotifyClientCredentials
11
+ from collections import defaultdict
12
+ from sklearn.metrics import euclidean_distances
13
+ from scipy.spatial.distance import cdist
14
+
15
+ number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
16
+ 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
17
+
18
+ # Load environment variables from .env file
19
+ load_dotenv()
20
+
21
+ # Access the Spotify API credentials
22
+ client_id = os.getenv('SPOTIFY_CLIENT_ID')
23
+ client_secret = os.getenv('SPOTIFY_CLIENT_SECRET')
24
+
25
+ sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id,
26
+ client_secret=client_secret))
27
+
28
+ def find_song(name):
29
+ song_data = defaultdict()
30
+ results = sp.search(q=name, limit=1, type='track')
31
+ if results["tracks"]["items"] == []:
32
+ return None
33
+
34
+ results = results["tracks"]["items"][0]
35
+ track_id = results["id"]
36
+ audio_features = sp.audio_features(track_id)[0]
37
+
38
+ song_data["name"] = [name]
39
+ song_data["year"] = [int(results['album']['release_date'].split("-")[0])]
40
+ song_data["artist"] = [results['artists'][0]['name']]
41
+ song_data["explicit"] = [int(results['explicit'])]
42
+ song_data['duration_ms'] = [results['duration_ms']]
43
+ song_data['popularity'] = [results['popularity']]
44
+
45
+ for key, value in audio_features.items():
46
+ song_data[key] = value
47
+
48
+ return pd.DataFrame(song_data)
49
+
50
+
51
+ def get_song_data(song, spotify_data):
52
+
53
+ try:
54
+ song_data = spotify_data[(spotify_data['name'] == song['name'])].iloc[0]
55
+ print(f"Finding the song in the fitted data.")
56
+ return song_data
57
+
58
+ except IndexError:
59
+ print(f"Could not find song in the fitted data. Trying to fetch online now...")
60
+ return find_song(song['name'])
61
+
62
+
63
+ def get_mean_vector(song_list, spotify_data):
64
+
65
+ song_vectors = []
66
+
67
+ for song in song_list:
68
+ song_data = get_song_data(song, spotify_data)
69
+ if song_data is None:
70
+ print('Warning: {} does not exist in Spotify or in database'.format(song['name']))
71
+ continue
72
+ song_vector = song_data[number_cols].values
73
+ song_vectors.append(song_vector)
74
+
75
+ song_matrix = np.array(list(song_vectors))
76
+ return np.mean(song_matrix, axis=0)
77
+
78
+
79
+ def flatten_dict_list(dict_list):
80
+
81
+ flattened_dict = defaultdict()
82
+ for key in dict_list[0].keys():
83
+ flattened_dict[key] = []
84
+
85
+ for dictionary in dict_list:
86
+ for key, value in dictionary.items():
87
+ flattened_dict[key].append(value)
88
+
89
+ return flattened_dict
90
+
91
+
92
+ def recommend_songs( song_list, spotify_data, n_songs=10):
93
+
94
+ metadata_cols = ['name', 'year', 'artists']
95
+ song_dict = flatten_dict_list(song_list)
96
+
97
+ # song_center = get_mean_vector(song_list, spotify_data)
98
+
99
+ song_center = get_song_data(song_list[0], spotify_data)[number_cols].values
100
+
101
+ scaler = StandardScaler()
102
+ scaled_data = scaler.transform(spotify_data[number_cols])
103
+ scaled_song_center = scaler.transform(song_center.reshape(1, -1))
104
+ distances = cdist(scaled_song_center, scaled_data, 'cosine')
105
+ index = list(np.argsort(distances)[:, :n_songs][0])
106
+
107
+ rec_songs = spotify_data.iloc[index]
108
+ rec_songs['name'] = rec_songs['name'].apply(lambda x: x.lower())
109
+ song_dict['name'] = [x.lower() for x in song_dict['name']]
110
+
111
+ rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
112
+ return rec_songs[metadata_cols].to_dict(orient='records')
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ pandas
3
+ numpy
4
+ scikit-learn
5
+ scipy
6
+ spotipy
7
+ python-dotenv