Update app.py
Browse files
app.py
CHANGED
@@ -2,8 +2,6 @@ import streamlit as st
|
|
2 |
import pandas as pd
|
3 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
4 |
from sklearn.neighbors import NearestNeighbors
|
5 |
-
from sklearn.decomposition import TruncatedSVD
|
6 |
-
import numpy as np
|
7 |
|
8 |
# Page config
|
9 |
st.set_page_config(
|
@@ -108,13 +106,13 @@ def run_imps(df):
|
|
108 |
|
109 |
# Collaborative Filtering
|
110 |
user_song_matrix = df.pivot_table(index='user', columns='song', values='play_count', fill_value=0)
|
111 |
-
|
112 |
-
|
113 |
-
song_factors = svd.components_.T
|
114 |
|
115 |
-
return df, tfidf, tfidf_matrix, nn, user_song_matrix,
|
116 |
|
117 |
-
df
|
|
|
118 |
|
119 |
# Content-based recommendation function
|
120 |
def content_based_recommend(song_title, top_n=5):
|
@@ -126,17 +124,28 @@ def content_based_recommend(song_title, top_n=5):
|
|
126 |
except IndexError:
|
127 |
return pd.DataFrame(columns=['title', 'artist_name', 'release'])
|
128 |
|
|
|
129 |
def collaborative_recommend(user_id, top_n=5):
|
130 |
if user_id not in user_song_matrix.index:
|
131 |
return pd.DataFrame(columns=['title', 'artist_name', 'release'])
|
132 |
|
133 |
-
|
134 |
-
|
135 |
-
|
|
|
|
|
|
|
136 |
listened_songs = user_song_matrix.loc[user_id][user_song_matrix.loc[user_id] > 0].index
|
137 |
-
scores = {song: score for song, score in zip(user_song_matrix.columns, scores) if song not in listened_songs}
|
138 |
|
139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
recommended_song_ids = [song for song, _ in recommended_songs]
|
141 |
return df[df['song'].isin(recommended_song_ids)][['title', 'artist_name', 'release']].drop_duplicates()
|
142 |
|
|
|
2 |
import pandas as pd
|
3 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
4 |
from sklearn.neighbors import NearestNeighbors
|
|
|
|
|
5 |
|
6 |
# Page config
|
7 |
st.set_page_config(
|
|
|
106 |
|
107 |
# Collaborative Filtering
|
108 |
user_song_matrix = df.pivot_table(index='user', columns='song', values='play_count', fill_value=0)
|
109 |
+
knn_cf = NearestNeighbors(n_neighbors=10, metric='cosine', algorithm='auto')
|
110 |
+
knn_cf.fit(user_song_matrix)
|
|
|
111 |
|
112 |
+
return df, tfidf, tfidf_matrix, nn, user_song_matrix, knn_cf
|
113 |
|
114 |
+
df = load_data()
|
115 |
+
df, tfidf, tfidf_matrix, nn, user_song_matrix, knn_cf = run_imps(df)
|
116 |
|
117 |
# Content-based recommendation function
|
118 |
def content_based_recommend(song_title, top_n=5):
|
|
|
124 |
except IndexError:
|
125 |
return pd.DataFrame(columns=['title', 'artist_name', 'release'])
|
126 |
|
127 |
+
# Collaborative recommendation function using KNN
|
128 |
def collaborative_recommend(user_id, top_n=5):
|
129 |
if user_id not in user_song_matrix.index:
|
130 |
return pd.DataFrame(columns=['title', 'artist_name', 'release'])
|
131 |
|
132 |
+
# Get the nearest neighbors for the user
|
133 |
+
user_index = user_song_matrix.index.get_loc(user_id)
|
134 |
+
distances, indices = knn_cf.kneighbors(user_song_matrix.iloc[user_index].values.reshape(1, -1), n_neighbors=top_n + 1)
|
135 |
+
|
136 |
+
# Collect recommendations from neighbors
|
137 |
+
neighbors = indices.flatten()[1:]
|
138 |
listened_songs = user_song_matrix.loc[user_id][user_song_matrix.loc[user_id] > 0].index
|
|
|
139 |
|
140 |
+
recommendations = {}
|
141 |
+
for neighbor in neighbors:
|
142 |
+
neighbor_songs = user_song_matrix.iloc[neighbor]
|
143 |
+
for song, play_count in neighbor_songs.items():
|
144 |
+
if song not in listened_songs and play_count > 0:
|
145 |
+
recommendations[song] = recommendations.get(song, 0) + play_count
|
146 |
+
|
147 |
+
# Sort songs by aggregated scores
|
148 |
+
recommended_songs = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)[:top_n]
|
149 |
recommended_song_ids = [song for song, _ in recommended_songs]
|
150 |
return df[df['song'].isin(recommended_song_ids)][['title', 'artist_name', 'release']].drop_duplicates()
|
151 |
|