Amit Kumar
commited on
Commit
β’
e7b83d9
1
Parent(s):
f636680
fit data using kmeans
Browse files- .gitattributes +1 -0
- app.py +3 -1
- kmeans_clustered_spotify_dataset.csv β data.csv +2 -2
- get_scaler.py +14 -0
- recommendations.py +2 -2
.gitattributes
CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
kmeans_clustered_spotify_dataset.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
kmeans_clustered_spotify_dataset.csv filter=lfs diff=lfs merge=lfs -text
|
37 |
+
data.csv filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -3,6 +3,7 @@ import spotipy
|
|
3 |
import gradio as gr
|
4 |
import pandas as pd
|
5 |
|
|
|
6 |
from dotenv import load_dotenv
|
7 |
from recommendations import recommend_songs
|
8 |
from spotipy.oauth2 import SpotifyClientCredentials
|
@@ -17,7 +18,8 @@ client_secret = os.getenv('SPOTIFY_CLIENT_SECRET')
|
|
17 |
# Authenticate with the Spotify API
|
18 |
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))
|
19 |
|
20 |
-
data = pd.read_csv("kmeans_clustered_spotify_dataset.csv")
|
|
|
21 |
|
22 |
def fetch_song_cover(song_name):
|
23 |
# Search for the song
|
|
|
3 |
import gradio as gr
|
4 |
import pandas as pd
|
5 |
|
6 |
+
from get_scaler import get_scaler
|
7 |
from dotenv import load_dotenv
|
8 |
from recommendations import recommend_songs
|
9 |
from spotipy.oauth2 import SpotifyClientCredentials
|
|
|
18 |
# Authenticate with the Spotify API
|
19 |
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))
|
20 |
|
21 |
+
# data = pd.read_csv("kmeans_clustered_spotify_dataset.csv")
|
22 |
+
data = get_scaler()[0]
|
23 |
|
24 |
def fetch_song_cover(song_name):
|
25 |
# Search for the song
|
kmeans_clustered_spotify_dataset.csv β data.csv
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c97c42349c2e97339ff908ab01163ec5a3264f02ec65e78cb961c85077ded7f8
|
3 |
+
size 29654587
|
get_scaler.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
def get_scaler():
|
3 |
+
|
4 |
+
data = pd.read_csv("data.csv")
|
5 |
+
song_cluster_pipeline = Pipeline([("scaler", StandardScaler()),
|
6 |
+
("kmeans", KMeans(n_clusters=20,
|
7 |
+
verbose=False))],
|
8 |
+
verbose=False)
|
9 |
+
|
10 |
+
X = data.select_dtypes(np.number)
|
11 |
+
number_cols = list(X.columns)
|
12 |
+
|
13 |
+
data["cluster_label"] = song_cluster_pipeline.fit_predict(X)
|
14 |
+
return data, song_cluster_pipeline
|
recommendations.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
|
2 |
import os
|
3 |
import spotipy
|
4 |
-
import difflib
|
5 |
import pandas as pd
|
6 |
import numpy as np
|
7 |
from sklearn.preprocessing import StandardScaler
|
8 |
|
|
|
9 |
from dotenv import load_dotenv
|
10 |
from spotipy.oauth2 import SpotifyClientCredentials
|
11 |
from collections import defaultdict
|
@@ -98,7 +98,7 @@ def recommend_songs( song_list, spotify_data, n_songs=10):
|
|
98 |
|
99 |
song_center = get_song_data(song_list[0], spotify_data)[number_cols].values
|
100 |
|
101 |
-
scaler =
|
102 |
scaled_data = scaler.transform(spotify_data[number_cols])
|
103 |
scaled_song_center = scaler.transform(song_center.reshape(1, -1))
|
104 |
distances = cdist(scaled_song_center, scaled_data, 'cosine')
|
|
|
1 |
|
2 |
import os
|
3 |
import spotipy
|
|
|
4 |
import pandas as pd
|
5 |
import numpy as np
|
6 |
from sklearn.preprocessing import StandardScaler
|
7 |
|
8 |
+
from get_scaler import get_scaler
|
9 |
from dotenv import load_dotenv
|
10 |
from spotipy.oauth2 import SpotifyClientCredentials
|
11 |
from collections import defaultdict
|
|
|
98 |
|
99 |
song_center = get_song_data(song_list[0], spotify_data)[number_cols].values
|
100 |
|
101 |
+
scaler = get_scaler()[1]
|
102 |
scaled_data = scaler.transform(spotify_data[number_cols])
|
103 |
scaled_song_center = scaler.transform(song_center.reshape(1, -1))
|
104 |
distances = cdist(scaled_song_center, scaled_data, 'cosine')
|