gagan3012 commited on
Commit
7150045
1 Parent(s): d87a848

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -12
app.py CHANGED
@@ -2,8 +2,6 @@ import streamlit as st
2
  import pandas as pd
3
  from sklearn.feature_extraction.text import TfidfVectorizer
4
  from sklearn.neighbors import NearestNeighbors
5
- from sklearn.decomposition import TruncatedSVD
6
- import numpy as np
7
 
8
  # Page config
9
  st.set_page_config(
@@ -108,13 +106,13 @@ def run_imps(df):
108
 
109
  # Collaborative Filtering
110
  user_song_matrix = df.pivot_table(index='user', columns='song', values='play_count', fill_value=0)
111
- svd = TruncatedSVD(n_components=20)
112
- user_factors = svd.fit_transform(user_song_matrix)
113
- song_factors = svd.components_.T
114
 
115
- return df, tfidf, tfidf_matrix, nn, user_song_matrix, user_factors, song_factors
116
 
117
- df, tfidf, tfidf_matrix, nn, user_song_matrix, user_factors, song_factors = run_imps(df)
 
118
 
119
  # Content-based recommendation function
120
  def content_based_recommend(song_title, top_n=5):
@@ -126,17 +124,28 @@ def content_based_recommend(song_title, top_n=5):
126
  except IndexError:
127
  return pd.DataFrame(columns=['title', 'artist_name', 'release'])
128
 
 
129
  def collaborative_recommend(user_id, top_n=5):
130
  if user_id not in user_song_matrix.index:
131
  return pd.DataFrame(columns=['title', 'artist_name', 'release'])
132
 
133
- user_vector = user_factors[user_song_matrix.index.get_loc(user_id)]
134
- scores = np.dot(song_factors, user_vector)
135
-
 
 
 
136
  listened_songs = user_song_matrix.loc[user_id][user_song_matrix.loc[user_id] > 0].index
137
- scores = {song: score for song, score in zip(user_song_matrix.columns, scores) if song not in listened_songs}
138
 
139
- recommended_songs = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:top_n]
 
 
 
 
 
 
 
 
140
  recommended_song_ids = [song for song, _ in recommended_songs]
141
  return df[df['song'].isin(recommended_song_ids)][['title', 'artist_name', 'release']].drop_duplicates()
142
 
 
2
  import pandas as pd
3
  from sklearn.feature_extraction.text import TfidfVectorizer
4
  from sklearn.neighbors import NearestNeighbors
 
 
5
 
6
  # Page config
7
  st.set_page_config(
 
106
 
107
  # Collaborative Filtering
108
  user_song_matrix = df.pivot_table(index='user', columns='song', values='play_count', fill_value=0)
109
+ knn_cf = NearestNeighbors(n_neighbors=10, metric='cosine', algorithm='auto')
110
+ knn_cf.fit(user_song_matrix)
 
111
 
112
+ return df, tfidf, tfidf_matrix, nn, user_song_matrix, knn_cf
113
 
114
+ df = load_data()
115
+ df, tfidf, tfidf_matrix, nn, user_song_matrix, knn_cf = run_imps(df)
116
 
117
  # Content-based recommendation function
118
  def content_based_recommend(song_title, top_n=5):
 
124
  except IndexError:
125
  return pd.DataFrame(columns=['title', 'artist_name', 'release'])
126
 
127
+ # Collaborative recommendation function using KNN
128
  def collaborative_recommend(user_id, top_n=5):
129
  if user_id not in user_song_matrix.index:
130
  return pd.DataFrame(columns=['title', 'artist_name', 'release'])
131
 
132
+ # Get the nearest neighbors for the user
133
+ user_index = user_song_matrix.index.get_loc(user_id)
134
+ distances, indices = knn_cf.kneighbors(user_song_matrix.iloc[user_index].values.reshape(1, -1), n_neighbors=top_n + 1)
135
+
136
+ # Collect recommendations from neighbors
137
+ neighbors = indices.flatten()[1:]
138
  listened_songs = user_song_matrix.loc[user_id][user_song_matrix.loc[user_id] > 0].index
 
139
 
140
+ recommendations = {}
141
+ for neighbor in neighbors:
142
+ neighbor_songs = user_song_matrix.iloc[neighbor]
143
+ for song, play_count in neighbor_songs.items():
144
+ if song not in listened_songs and play_count > 0:
145
+ recommendations[song] = recommendations.get(song, 0) + play_count
146
+
147
+ # Sort songs by aggregated scores
148
+ recommended_songs = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)[:top_n]
149
  recommended_song_ids = [song for song, _ in recommended_songs]
150
  return df[df['song'].isin(recommended_song_ids)][['title', 'artist_name', 'release']].drop_duplicates()
151