pop rec v2
Browse files
app.py
CHANGED
@@ -61,34 +61,52 @@ rating_df['datetime'] = rating_df['timestamp'].apply(datetime.fromtimestamp)
|
|
61 |
rating_df.drop(columns=['timestamp'], inplace=True)
|
62 |
# %% DEFINE FUNCTIONS
|
63 |
|
64 |
-
|
65 |
# to make the the dataframe look nicer
|
66 |
def make_pretty(styler):
|
67 |
styler.set_caption("Top movie recommendations for you")
|
68 |
# styler.background_gradient(cmap="YlGnBu")
|
69 |
return styler
|
70 |
|
71 |
-
# population based
|
72 |
-
def popular_n_movies(n, genre):
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
# movie/item based
|
94 |
def item_n_movies(movie_name, n):
|
|
|
61 |
rating_df.drop(columns=['timestamp'], inplace=True)
|
62 |
# %% DEFINE FUNCTIONS
|
63 |
|
|
|
64 |
# to make the the dataframe look nicer
|
65 |
def make_pretty(styler):
|
66 |
styler.set_caption("Top movie recommendations for you")
|
67 |
# styler.background_gradient(cmap="YlGnBu")
|
68 |
return styler
|
69 |
|
70 |
+
# population based: v1
|
71 |
+
# def popular_n_movies(n, genre):
|
72 |
+
# popular_n = (
|
73 |
+
# rating_df
|
74 |
+
# .groupby(by='movieId')
|
75 |
+
# .agg(rating_mean=('rating', 'mean'),
|
76 |
+
# rating_count=('movieId', 'count'),
|
77 |
+
# datetime=('datetime','mean'))
|
78 |
+
# .sort_values(['rating_mean','rating_count','datetime'], ascending= False)
|
79 |
+
# .loc[lambda df_ :df_['rating_count'] >= (df_['rating_count'].mean() + df_['rating_count'].median())/2]
|
80 |
+
# .reset_index()
|
81 |
+
# )['movieId'].to_list()
|
82 |
+
# result = movie_df.loc[lambda df_ : df_['movieId'].isin(popular_n)]
|
83 |
+
# if genre != 'Any':
|
84 |
+
# result = result.loc[lambda df_ : df_['genres'].str.contains(genre)]
|
85 |
+
# df_rec = result.head(n).reset_index(drop=True)
|
86 |
+
# df_rec = df_rec[['title', 'genres', 'year']].reset_index(drop=True)
|
87 |
+
# new_index = ['movie-{}'.format(i+1) for i in range(n)]
|
88 |
+
# df_rec.index = new_index
|
89 |
+
# pretty_rec = df_rec.style.pipe(make_pretty)
|
90 |
+
# return pretty_rec
|
91 |
+
|
92 |
+
# population_based v2
|
93 |
+
def popular_n_movies(n, genres):
|
94 |
+
if genres == "Any":
|
95 |
+
genres = ""
|
96 |
+
recommendations = (
|
97 |
+
rating_df
|
98 |
+
.groupby('movieId')
|
99 |
+
.agg(avg_rating = ('rating', 'mean'), num_ratings = ('rating', 'count'))
|
100 |
+
.merge(movie_df, on='movieId')
|
101 |
+
.assign(combined_rating = lambda x: x['avg_rating'] * x['num_ratings']**0.5)
|
102 |
+
[lambda df: df["genres"].str.contains(genres, regex=True)]
|
103 |
+
.sort_values('combined_rating', ascending=False)
|
104 |
+
.head(n)
|
105 |
+
[['title', 'avg_rating', 'genres']]
|
106 |
+
.rename(columns= {'title': 'Movie Title', 'avg_rating': 'Average Rating', 'genres': 'Genres'})
|
107 |
+
)
|
108 |
+
pretty_recommendations = recommendations.style.pipe(make_pretty)
|
109 |
+
return pretty_recommendations
|
110 |
|
111 |
# movie/item based
|
112 |
def item_n_movies(movie_name, n):
|