mr-robber commited on
Commit
bc6208b
1 Parent(s): 2c50f05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -5
app.py CHANGED
@@ -3,10 +3,116 @@ from bs4 import BeautifulSoup
3
  import requests
4
  import re
5
  import pandas as pd
6
-
 
 
 
 
7
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
 
 
 
 
 
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  def get_video_address(row):
12
 
@@ -99,10 +205,18 @@ headers = {
99
  "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiJmMTNkMmNiMTY4MWM3ZWEyMjNiYjgxZTUyYmMzMDUyMCIsInN1YiI6IjY0YTY5MTU1YzNiZmZlMDBjODZiYzRlZSIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.H6zk_gSiudNdZTsgwYHFQnJiVXfR_BX5DfYNIoIsRv8"
100
  }
101
 
102
-
 
103
  def recommend_movie(movie_name, Number_of_Recommendation):
 
 
104
  Number_of_Recommendation = int(Number_of_Recommendation)
105
- outputIds = ['tt0114709', 'tt0113497', 'tt0113228', 'tt0114885', 'tt0113041']
 
 
 
 
 
106
  html = '''
107
  <html>
108
  <head>
@@ -124,10 +238,17 @@ def recommend_movie(movie_name, Number_of_Recommendation):
124
  video_response = requests.get(url, headers=headers)
125
  video = video_response.json()
126
  video = get_video_address(video['results'])
127
-
 
 
 
 
 
128
  url = "https://api.themoviedb.org/3/movie/{id}/images".format(id = outputIds[i])
129
  image_response = requests.get(url, headers=headers)
130
  image = image_response.json()
 
 
131
  html +='''
132
  <div class="bdy">
133
  <div class = "top">
@@ -147,11 +268,13 @@ def recommend_movie(movie_name, Number_of_Recommendation):
147
  <div class="contents">
148
  <img src='https://image.tmdb.org/t/p/w500'''+image['backdrops'][0]['file_path']+'''' class="img">
149
  <iframe
150
- src="https://www.youtube.com/embed/'''+video[0]+'''" class="video">
151
  </iframe>
152
  </div>
153
  <h3>
154
  '''+ data['overview']+'''
 
 
155
  </h3>
156
  </div>
157
  </div>
@@ -166,6 +289,7 @@ def recommend_movie(movie_name, Number_of_Recommendation):
166
 
167
  html = ''
168
 
 
169
  iface = gr.Interface(fn=recommend_movie,
170
  inputs=[gr.Dataframe(headers=["Name", "Rate"],
171
  datatype=["str", "number"],
@@ -173,6 +297,7 @@ iface = gr.Interface(fn=recommend_movie,
173
  col_count=(2, "fixed")),
174
  "number"
175
  ],
 
176
  outputs=gr.HTML(html),
177
  title="Movie Recommender",
178
  description="Enter a movie name and your rating (out of 10) for the movie. you must enter at least 3 movies and all words must start with capital letters, example : Grumpier Old Men",
@@ -181,3 +306,4 @@ iface = gr.Interface(fn=recommend_movie,
181
  )
182
 
183
  iface.launch()
 
 
3
  import requests
4
  import re
5
  import pandas as pd
6
+ import numpy as np
7
+ import pandas as pd
8
+ import matplotlib.pyplot as plt
9
+ import seaborn as sns
10
+ import scipy.stats
11
  import gradio as gr
12
+ from sklearn.metrics.pairwise import cosine_similarity
13
+
14
+ from sklearn.feature_extraction.text import TfidfVectorizer
15
+ from datasketch import MinHashLSHForest, MinHash
16
+ import pandas as pd
17
+ import random
18
+
19
+ class ContentBasedRecommender:
20
+ def __init__(self, movies_metadata_path):
21
+ self.movies_metadata_path = movies_metadata_path
22
+ self.indices = None
23
+ self.df2 = None
24
+ self.forest = None
25
+ self.minhashes = None
26
 
27
+ def get_recommendations(self, title, top_k):
28
+ idx = self.indices[title]
29
+ query_minhash = self.minhashes[idx][1]
30
+ nearest_neighbors = self.forest.query(query_minhash, top_k)
31
+ movie_indices = [int(nn) for nn in nearest_neighbors if nn != idx][:top_k]
32
+ return self.df2['title'].iloc[movie_indices]
33
 
34
+ def final_recommends(self, movies, result_number):
35
+ res = []
36
+ for i in range(len(movies)):
37
+ recommendations = self.get_recommendations(movies[i], top_k=10)
38
+ if len(recommendations) > 0:
39
+ for j in range(9):
40
+ res.append(recommendations.iloc[j])
41
+
42
+ random.shuffle(res)
43
+ return res[:result_number]
44
+
45
+ def content_based_recommendation(self):
46
+ self.df2 = pd.read_csv(self.movies_metadata_path)
47
+ tfidf = TfidfVectorizer(stop_words='english')
48
+ self.df2['overview'] = self.df2['overview'].fillna('')
49
+ tfidf_matrix = tfidf.fit_transform(self.df2['overview'])
50
+ self.minhashes = []
51
+ for i in range(tfidf_matrix.shape[0]):
52
+ vector = tfidf_matrix[i]
53
+ doc_id = self.df2.index[i]
54
+ minhash = MinHash(num_perm=128)
55
+ for token in vector.nonzero()[1]:
56
+ minhash.update(str(token).encode('utf-8'))
57
+ self.minhashes.append((doc_id, minhash))
58
+ self.forest = MinHashLSHForest(num_perm=128)
59
+ for doc_id, minhash in self.minhashes:
60
+ self.forest.add(doc_id, minhash)
61
+ self.forest.index()
62
+ self.indices = pd.Series(self.df2.index, index=self.df2['title']).drop_duplicates()
63
+ def colabrative(user_movies, ret_number):
64
+
65
+ user_df = {'userId':[],
66
+ 'movieId':[],
67
+ 'rating'[]
68
+ }
69
+
70
+
71
+ ratings = pd.read_csv('/kaggle/input/the-movies-dataset/ratings.csv')
72
+ ratings['rating'] = ratings['rating'] * 2
73
+ ratings['rating'] = ratings['rating'].astype(int)
74
+
75
+ comment_counts = pd.DataFrame(ratings["movieId"].value_counts())
76
+ rare_movies = comment_counts[comment_counts["movieId"] <= 1000].index
77
+ common_movies = ratings[~ratings["movieId"].isin(rare_movies)]
78
+ counter = 0
79
+ for user_movie in user_movies:
80
+ if user_movie[0] in common_movies["movieId"]:
81
+ user_df['userId'].append(300000)
82
+ user_df['movieId'].append(user_movie[0])
83
+ user_df['rating'].append(user_movie[1])
84
+ counter += 1
85
+ if counter <= 3:
86
+ return []
87
+
88
+ ratings.append(user_df)
89
+ comment_counts = pd.DataFrame(ratings["movieId"].value_counts())
90
+ rare_movies = comment_counts[comment_counts["movieId"] <= 1000].index
91
+ common_movies = ratings[~ratings["movieId"].isin(rare_movies)]
92
+ user_movie_df = common_movies.pivot_table(index=["userId"], columns=["movieId"], values='rating')
93
+ random_user = 300000
94
+
95
+ random_user_df = user_movie_df[user_movie_df.index == random_user]
96
+ movies_watched = random_user_df.columns[random_user_df.notna().any()].tolist()
97
+ movies_watched_df = user_movie_df[movies_watched]
98
+ user_movie_count = movies_watched_df.T.notnull().sum()
99
+ user_movie_count = user_movie_count.reset_index()
100
+ user_movie_count.columns = ["userid", "movie_count"]
101
+ perc = len(movies_watched) * 60 / 100
102
+ user_same_movies = user_movie_count[user_movie_count["movie_count"] > perc]["userid"]
103
+ final_df = movies_watched_df[movies_watched_df.index.isin(user_same_movies)]
104
+ corr_df = final_df.T.corr().unstack().sort_values().drop_duplicates()
105
+ corr_df = pd.DataFrame(corr_df, columns=["corr"])
106
+ corr_df.index.names = ["userid_1", "userid_2"]
107
+ corr_df = corr_df.reset_index()
108
+ top_users = corr_df[(corr_df["userid_1"] == random_user) & (corr_df["corr"] > 0.65)][["userid_2", "corr"]]
109
+ top_users.columns = ["userId", "corr"]
110
+ top_users_score = top_users.merge(ratings[["userId", "movieId", "rating"]], how="inner")
111
+ top_users_score["weighted_reting"] = top_users_score["corr"] * top_users_score["rating"]
112
+ recommendation_df = top_users_score.groupby("movieId").agg({"weighted_reting": "mean"})
113
+ recommendation_df = recommendation_df.reset_index()
114
+ movies_to_be_recommended = recommendation_df[recommendation_df["weighted_reting"] > 3.5].sort_values("weighted_reting", ascending=False)
115
+ return(movies_to_be_recommended.merge(movie_df[["movieId"]])["movieId"][:ret_number])
116
 
117
  def get_video_address(row):
118
 
 
205
  "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiJmMTNkMmNiMTY4MWM3ZWEyMjNiYjgxZTUyYmMzMDUyMCIsInN1YiI6IjY0YTY5MTU1YzNiZmZlMDBjODZiYzRlZSIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.H6zk_gSiudNdZTsgwYHFQnJiVXfR_BX5DfYNIoIsRv8"
206
  }
207
 
208
+ recommender = ContentBasedRecommender('movies_metadata.csv')
209
+ recommender.content_based_recommendation()
210
  def recommend_movie(movie_name, Number_of_Recommendation):
211
+ movies = pd.read_csv('/kaggle/input/the-movies-dataset/movies_metadata.csv')
212
+
213
  Number_of_Recommendation = int(Number_of_Recommendation)
214
+ names = recommends = recommender.final_recommends(movies= movie_name['Name'], result_number=Number_of_Recommendation)
215
+ outputIds = []
216
+ for i in range(len(names)):
217
+ # print(movies[movies['title'] == names[i]]['imdb_id'].iloc[0])
218
+ outputIds.append(movies[movies['title'] == names[i]]['imdb_id'].iloc[0])
219
+ # outputIds = ['tt0114709', 'tt0113497', 'tt0113228', 'tt0114885', 'tt0113041']
220
  html = '''
221
  <html>
222
  <head>
 
238
  video_response = requests.get(url, headers=headers)
239
  video = video_response.json()
240
  video = get_video_address(video['results'])
241
+ if type(video) == bool:
242
+ video = []
243
+ for dictionary in video['results']:
244
+ video.append(dictionary['key'])
245
+ if len(video) == 0:
246
+ video = ['']
247
  url = "https://api.themoviedb.org/3/movie/{id}/images".format(id = outputIds[i])
248
  image_response = requests.get(url, headers=headers)
249
  image = image_response.json()
250
+ if len(image['backdrops']) == 0:
251
+ image['backdrops'] = [{'file_path':''}]
252
  html +='''
253
  <div class="bdy">
254
  <div class = "top">
 
268
  <div class="contents">
269
  <img src='https://image.tmdb.org/t/p/w500'''+image['backdrops'][0]['file_path']+'''' class="img">
270
  <iframe
271
+ src="https://www.youtube.com/embed/'''+video[0]+'''" class="video" height = 200px>
272
  </iframe>
273
  </div>
274
  <h3>
275
  '''+ data['overview']+'''
276
+
277
+
278
  </h3>
279
  </div>
280
  </div>
 
289
 
290
  html = ''
291
 
292
+
293
  iface = gr.Interface(fn=recommend_movie,
294
  inputs=[gr.Dataframe(headers=["Name", "Rate"],
295
  datatype=["str", "number"],
 
297
  col_count=(2, "fixed")),
298
  "number"
299
  ],
300
+
301
  outputs=gr.HTML(html),
302
  title="Movie Recommender",
303
  description="Enter a movie name and your rating (out of 10) for the movie. you must enter at least 3 movies and all words must start with capital letters, example : Grumpier Old Men",
 
306
  )
307
 
308
  iface.launch()
309
+