Amit Kumar commited on
Commit
e7b83d9
β€’
1 Parent(s): f636680

fit data using kmeans

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  kmeans_clustered_spotify_dataset.csv filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  kmeans_clustered_spotify_dataset.csv filter=lfs diff=lfs merge=lfs -text
37
+ data.csv filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -3,6 +3,7 @@ import spotipy
3
  import gradio as gr
4
  import pandas as pd
5
 
 
6
  from dotenv import load_dotenv
7
  from recommendations import recommend_songs
8
  from spotipy.oauth2 import SpotifyClientCredentials
@@ -17,7 +18,8 @@ client_secret = os.getenv('SPOTIFY_CLIENT_SECRET')
17
  # Authenticate with the Spotify API
18
  sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))
19
 
20
- data = pd.read_csv("kmeans_clustered_spotify_dataset.csv")
 
21
 
22
  def fetch_song_cover(song_name):
23
  # Search for the song
 
3
  import gradio as gr
4
  import pandas as pd
5
 
6
+ from get_scaler import get_scaler
7
  from dotenv import load_dotenv
8
  from recommendations import recommend_songs
9
  from spotipy.oauth2 import SpotifyClientCredentials
 
18
  # Authenticate with the Spotify API
19
  sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))
20
 
21
+ # data = pd.read_csv("kmeans_clustered_spotify_dataset.csv")
22
+ data = get_scaler()[0]
23
 
24
  def fetch_song_cover(song_name):
25
  # Search for the song
kmeans_clustered_spotify_dataset.csv β†’ data.csv RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab1f64fbf645f615ad389912e5fe63a23610a5f31c589f24aff0eafbc257a044
3
- size 31824490
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c97c42349c2e97339ff908ab01163ec5a3264f02ec65e78cb961c85077ded7f8
3
+ size 29654587
get_scaler.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ def get_scaler():
3
+
4
+ data = pd.read_csv("data.csv")
5
+ song_cluster_pipeline = Pipeline([("scaler", StandardScaler()),
6
+ ("kmeans", KMeans(n_clusters=20,
7
+ verbose=False))],
8
+ verbose=False)
9
+
10
+ X = data.select_dtypes(np.number)
11
+ number_cols = list(X.columns)
12
+
13
+ data["cluster_label"] = song_cluster_pipeline.fit_predict(X)
14
+ return data, song_cluster_pipeline
recommendations.py CHANGED
@@ -1,11 +1,11 @@
1
 
2
  import os
3
  import spotipy
4
- import difflib
5
  import pandas as pd
6
  import numpy as np
7
  from sklearn.preprocessing import StandardScaler
8
 
 
9
  from dotenv import load_dotenv
10
  from spotipy.oauth2 import SpotifyClientCredentials
11
  from collections import defaultdict
@@ -98,7 +98,7 @@ def recommend_songs( song_list, spotify_data, n_songs=10):
98
 
99
  song_center = get_song_data(song_list[0], spotify_data)[number_cols].values
100
 
101
- scaler = StandardScaler()
102
  scaled_data = scaler.transform(spotify_data[number_cols])
103
  scaled_song_center = scaler.transform(song_center.reshape(1, -1))
104
  distances = cdist(scaled_song_center, scaled_data, 'cosine')
 
1
 
2
  import os
3
  import spotipy
 
4
  import pandas as pd
5
  import numpy as np
6
  from sklearn.preprocessing import StandardScaler
7
 
8
+ from get_scaler import get_scaler
9
  from dotenv import load_dotenv
10
  from spotipy.oauth2 import SpotifyClientCredentials
11
  from collections import defaultdict
 
98
 
99
  song_center = get_song_data(song_list[0], spotify_data)[number_cols].values
100
 
101
+ scaler = get_scaler()[1]
102
  scaled_data = scaler.transform(spotify_data[number_cols])
103
  scaled_song_center = scaler.transform(song_center.reshape(1, -1))
104
  distances = cdist(scaled_song_center, scaled_data, 'cosine')