ilkayisik commited on
Commit
9c7f8e2
1 Parent(s): cdf9a36

pop rec v2

Browse files
Files changed (1) hide show
  1. app.py +40 -22
app.py CHANGED
@@ -61,34 +61,52 @@ rating_df['datetime'] = rating_df['timestamp'].apply(datetime.fromtimestamp)
61
  rating_df.drop(columns=['timestamp'], inplace=True)
62
  # %% DEFINE FUNCTIONS
63
 
64
-
65
  # to make the the dataframe look nicer
66
  def make_pretty(styler):
67
  styler.set_caption("Top movie recommendations for you")
68
  # styler.background_gradient(cmap="YlGnBu")
69
  return styler
70
 
71
- # population based
72
- def popular_n_movies(n, genre):
73
- popular_n = (
74
- rating_df
75
- .groupby(by='movieId')
76
- .agg(rating_mean=('rating', 'mean'),
77
- rating_count=('movieId', 'count'),
78
- datetime=('datetime','mean'))
79
- .sort_values(['rating_mean','rating_count','datetime'], ascending= False)
80
- .loc[lambda df_ :df_['rating_count'] >= (df_['rating_count'].mean() + df_['rating_count'].median())/2]
81
- .reset_index()
82
- )['movieId'].to_list()
83
- result = movie_df.loc[lambda df_ : df_['movieId'].isin(popular_n)]
84
- if genre != 'Any':
85
- result = result.loc[lambda df_ : df_['genres'].str.contains(genre)]
86
- df_rec = result.head(n).reset_index(drop=True)
87
- df_rec = df_rec[['title', 'genres', 'year']].reset_index(drop=True)
88
- new_index = ['movie-{}'.format(i+1) for i in range(n)]
89
- df_rec.index = new_index
90
- pretty_rec = df_rec.style.pipe(make_pretty)
91
- return pretty_rec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
  # movie/item based
94
  def item_n_movies(movie_name, n):
 
61
  rating_df.drop(columns=['timestamp'], inplace=True)
62
  # %% DEFINE FUNCTIONS
63
 
 
64
  # to make the the dataframe look nicer
65
  def make_pretty(styler):
66
  styler.set_caption("Top movie recommendations for you")
67
  # styler.background_gradient(cmap="YlGnBu")
68
  return styler
69
 
70
+ # population based: v1
71
+ # def popular_n_movies(n, genre):
72
+ # popular_n = (
73
+ # rating_df
74
+ # .groupby(by='movieId')
75
+ # .agg(rating_mean=('rating', 'mean'),
76
+ # rating_count=('movieId', 'count'),
77
+ # datetime=('datetime','mean'))
78
+ # .sort_values(['rating_mean','rating_count','datetime'], ascending= False)
79
+ # .loc[lambda df_ :df_['rating_count'] >= (df_['rating_count'].mean() + df_['rating_count'].median())/2]
80
+ # .reset_index()
81
+ # )['movieId'].to_list()
82
+ # result = movie_df.loc[lambda df_ : df_['movieId'].isin(popular_n)]
83
+ # if genre != 'Any':
84
+ # result = result.loc[lambda df_ : df_['genres'].str.contains(genre)]
85
+ # df_rec = result.head(n).reset_index(drop=True)
86
+ # df_rec = df_rec[['title', 'genres', 'year']].reset_index(drop=True)
87
+ # new_index = ['movie-{}'.format(i+1) for i in range(n)]
88
+ # df_rec.index = new_index
89
+ # pretty_rec = df_rec.style.pipe(make_pretty)
90
+ # return pretty_rec
91
+
92
+ # population_based v2
93
+ def popular_n_movies(n, genres):
94
+ if genres == "Any":
95
+ genres = ""
96
+ recommendations = (
97
+ rating_df
98
+ .groupby('movieId')
99
+ .agg(avg_rating = ('rating', 'mean'), num_ratings = ('rating', 'count'))
100
+ .merge(movie_df, on='movieId')
101
+ .assign(combined_rating = lambda x: x['avg_rating'] * x['num_ratings']**0.5)
102
+ [lambda df: df["genres"].str.contains(genres, regex=True)]
103
+ .sort_values('combined_rating', ascending=False)
104
+ .head(n)
105
+ [['title', 'avg_rating', 'genres']]
106
+ .rename(columns= {'title': 'Movie Title', 'avg_rating': 'Average Rating', 'genres': 'Genres'})
107
+ )
108
+ pretty_recommendations = recommendations.style.pipe(make_pretty)
109
+ return pretty_recommendations
110
 
111
  # movie/item based
112
  def item_n_movies(movie_name, n):