Spaces:

ilkayisik
/

WBSFLIX

Runtime error

App Files Files Community

ilkayisik commited on Sep 9, 2022

Commit

9c7f8e2

•

1 Parent(s): cdf9a36

pop rec v2

Browse files

Files changed (1) hide show

app.py +40 -22

app.py CHANGED Viewed

@@ -61,34 +61,52 @@ rating_df['datetime'] = rating_df['timestamp'].apply(datetime.fromtimestamp)
 rating_df.drop(columns=['timestamp'], inplace=True)
 # %% DEFINE FUNCTIONS
 # to make the the dataframe look nicer
 def make_pretty(styler):
     styler.set_caption("Top movie recommendations for you")
     # styler.background_gradient(cmap="YlGnBu")
     return styler
-# population based
-def popular_n_movies(n, genre):
-    popular_n = (
-    rating_df
-            .groupby(by='movieId')
-            .agg(rating_mean=('rating', 'mean'),
-                 rating_count=('movieId', 'count'),
-                 datetime=('datetime','mean'))
-            .sort_values(['rating_mean','rating_count','datetime'], ascending= False)
-            .loc[lambda df_ :df_['rating_count'] >= (df_['rating_count'].mean() + df_['rating_count'].median())/2]
-            .reset_index()
-    )['movieId'].to_list()
-    result = movie_df.loc[lambda df_ : df_['movieId'].isin(popular_n)]
-    if genre != 'Any':
-            result = result.loc[lambda df_ : df_['genres'].str.contains(genre)]
-    df_rec = result.head(n).reset_index(drop=True)
-    df_rec = df_rec[['title', 'genres', 'year']].reset_index(drop=True)
-    new_index = ['movie-{}'.format(i+1) for i in range(n)]
-    df_rec.index = new_index
-    pretty_rec = df_rec.style.pipe(make_pretty)
-    return pretty_rec
 # movie/item based
 def item_n_movies(movie_name, n):

 rating_df.drop(columns=['timestamp'], inplace=True)
 # %% DEFINE FUNCTIONS
 # to make the the dataframe look nicer
 def make_pretty(styler):
     styler.set_caption("Top movie recommendations for you")
     # styler.background_gradient(cmap="YlGnBu")
     return styler
+# population based: v1
+# def popular_n_movies(n, genre):
+#     popular_n = (
+#     rating_df
+#             .groupby(by='movieId')
+#             .agg(rating_mean=('rating', 'mean'),
+#                  rating_count=('movieId', 'count'),
+#                  datetime=('datetime','mean'))
+#             .sort_values(['rating_mean','rating_count','datetime'], ascending= False)
+#             .loc[lambda df_ :df_['rating_count'] >= (df_['rating_count'].mean() + df_['rating_count'].median())/2]
+#             .reset_index()
+#     )['movieId'].to_list()
+#     result = movie_df.loc[lambda df_ : df_['movieId'].isin(popular_n)]
+#     if genre != 'Any':
+#             result = result.loc[lambda df_ : df_['genres'].str.contains(genre)]
+#     df_rec = result.head(n).reset_index(drop=True)
+#     df_rec = df_rec[['title', 'genres', 'year']].reset_index(drop=True)
+#     new_index = ['movie-{}'.format(i+1) for i in range(n)]
+#     df_rec.index = new_index
+#     pretty_rec = df_rec.style.pipe(make_pretty)
+#     return pretty_rec
+# population_based v2
+def popular_n_movies(n, genres):
+    if genres == "Any":
+        genres = ""
+    recommendations = (
+        rating_df
+            .groupby('movieId')
+            .agg(avg_rating = ('rating', 'mean'), num_ratings = ('rating', 'count'))
+            .merge(movie_df, on='movieId')
+            .assign(combined_rating = lambda x: x['avg_rating'] * x['num_ratings']**0.5)
+            [lambda df: df["genres"].str.contains(genres, regex=True)]
+            .sort_values('combined_rating', ascending=False)
+            .head(n)
+            [['title', 'avg_rating', 'genres']]
+            .rename(columns= {'title': 'Movie Title', 'avg_rating': 'Average Rating', 'genres': 'Genres'})
+    )
+    pretty_recommendations = recommendations.style.pipe(make_pretty)
+    return pretty_recommendations
 # movie/item based
 def item_n_movies(movie_name, n):