Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,10 +3,116 @@ from bs4 import BeautifulSoup
|
|
3 |
import requests
|
4 |
import re
|
5 |
import pandas as pd
|
6 |
-
|
|
|
|
|
|
|
|
|
7 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
def get_video_address(row):
|
12 |
|
@@ -99,10 +205,18 @@ headers = {
|
|
99 |
"Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiJmMTNkMmNiMTY4MWM3ZWEyMjNiYjgxZTUyYmMzMDUyMCIsInN1YiI6IjY0YTY5MTU1YzNiZmZlMDBjODZiYzRlZSIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.H6zk_gSiudNdZTsgwYHFQnJiVXfR_BX5DfYNIoIsRv8"
|
100 |
}
|
101 |
|
102 |
-
|
|
|
103 |
def recommend_movie(movie_name, Number_of_Recommendation):
|
|
|
|
|
104 |
Number_of_Recommendation = int(Number_of_Recommendation)
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
106 |
html = '''
|
107 |
<html>
|
108 |
<head>
|
@@ -124,10 +238,17 @@ def recommend_movie(movie_name, Number_of_Recommendation):
|
|
124 |
video_response = requests.get(url, headers=headers)
|
125 |
video = video_response.json()
|
126 |
video = get_video_address(video['results'])
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
128 |
url = "https://api.themoviedb.org/3/movie/{id}/images".format(id = outputIds[i])
|
129 |
image_response = requests.get(url, headers=headers)
|
130 |
image = image_response.json()
|
|
|
|
|
131 |
html +='''
|
132 |
<div class="bdy">
|
133 |
<div class = "top">
|
@@ -147,11 +268,13 @@ def recommend_movie(movie_name, Number_of_Recommendation):
|
|
147 |
<div class="contents">
|
148 |
<img src='https://image.tmdb.org/t/p/w500'''+image['backdrops'][0]['file_path']+'''' class="img">
|
149 |
<iframe
|
150 |
-
src="https://www.youtube.com/embed/'''+video[0]+'''" class="video">
|
151 |
</iframe>
|
152 |
</div>
|
153 |
<h3>
|
154 |
'''+ data['overview']+'''
|
|
|
|
|
155 |
</h3>
|
156 |
</div>
|
157 |
</div>
|
@@ -166,6 +289,7 @@ def recommend_movie(movie_name, Number_of_Recommendation):
|
|
166 |
|
167 |
html = ''
|
168 |
|
|
|
169 |
iface = gr.Interface(fn=recommend_movie,
|
170 |
inputs=[gr.Dataframe(headers=["Name", "Rate"],
|
171 |
datatype=["str", "number"],
|
@@ -173,6 +297,7 @@ iface = gr.Interface(fn=recommend_movie,
|
|
173 |
col_count=(2, "fixed")),
|
174 |
"number"
|
175 |
],
|
|
|
176 |
outputs=gr.HTML(html),
|
177 |
title="Movie Recommender",
|
178 |
description="Enter a movie name and your rating (out of 10) for the movie. you must enter at least 3 movies and all words must start with capital letters, example : Grumpier Old Men",
|
@@ -181,3 +306,4 @@ iface = gr.Interface(fn=recommend_movie,
|
|
181 |
)
|
182 |
|
183 |
iface.launch()
|
|
|
|
3 |
import requests
|
4 |
import re
|
5 |
import pandas as pd
|
6 |
+
import numpy as np
|
7 |
+
import pandas as pd
|
8 |
+
import matplotlib.pyplot as plt
|
9 |
+
import seaborn as sns
|
10 |
+
import scipy.stats
|
11 |
import gradio as gr
|
12 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
13 |
+
|
14 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
15 |
+
from datasketch import MinHashLSHForest, MinHash
|
16 |
+
import pandas as pd
|
17 |
+
import random
|
18 |
+
|
19 |
+
class ContentBasedRecommender:
|
20 |
+
def __init__(self, movies_metadata_path):
|
21 |
+
self.movies_metadata_path = movies_metadata_path
|
22 |
+
self.indices = None
|
23 |
+
self.df2 = None
|
24 |
+
self.forest = None
|
25 |
+
self.minhashes = None
|
26 |
|
27 |
+
def get_recommendations(self, title, top_k):
|
28 |
+
idx = self.indices[title]
|
29 |
+
query_minhash = self.minhashes[idx][1]
|
30 |
+
nearest_neighbors = self.forest.query(query_minhash, top_k)
|
31 |
+
movie_indices = [int(nn) for nn in nearest_neighbors if nn != idx][:top_k]
|
32 |
+
return self.df2['title'].iloc[movie_indices]
|
33 |
|
34 |
+
def final_recommends(self, movies, result_number):
|
35 |
+
res = []
|
36 |
+
for i in range(len(movies)):
|
37 |
+
recommendations = self.get_recommendations(movies[i], top_k=10)
|
38 |
+
if len(recommendations) > 0:
|
39 |
+
for j in range(9):
|
40 |
+
res.append(recommendations.iloc[j])
|
41 |
+
|
42 |
+
random.shuffle(res)
|
43 |
+
return res[:result_number]
|
44 |
+
|
45 |
+
def content_based_recommendation(self):
|
46 |
+
self.df2 = pd.read_csv(self.movies_metadata_path)
|
47 |
+
tfidf = TfidfVectorizer(stop_words='english')
|
48 |
+
self.df2['overview'] = self.df2['overview'].fillna('')
|
49 |
+
tfidf_matrix = tfidf.fit_transform(self.df2['overview'])
|
50 |
+
self.minhashes = []
|
51 |
+
for i in range(tfidf_matrix.shape[0]):
|
52 |
+
vector = tfidf_matrix[i]
|
53 |
+
doc_id = self.df2.index[i]
|
54 |
+
minhash = MinHash(num_perm=128)
|
55 |
+
for token in vector.nonzero()[1]:
|
56 |
+
minhash.update(str(token).encode('utf-8'))
|
57 |
+
self.minhashes.append((doc_id, minhash))
|
58 |
+
self.forest = MinHashLSHForest(num_perm=128)
|
59 |
+
for doc_id, minhash in self.minhashes:
|
60 |
+
self.forest.add(doc_id, minhash)
|
61 |
+
self.forest.index()
|
62 |
+
self.indices = pd.Series(self.df2.index, index=self.df2['title']).drop_duplicates()
|
63 |
+
def colabrative(user_movies, ret_number):
|
64 |
+
|
65 |
+
user_df = {'userId':[],
|
66 |
+
'movieId':[],
|
67 |
+
'rating'[]
|
68 |
+
}
|
69 |
+
|
70 |
+
|
71 |
+
ratings = pd.read_csv('/kaggle/input/the-movies-dataset/ratings.csv')
|
72 |
+
ratings['rating'] = ratings['rating'] * 2
|
73 |
+
ratings['rating'] = ratings['rating'].astype(int)
|
74 |
+
|
75 |
+
comment_counts = pd.DataFrame(ratings["movieId"].value_counts())
|
76 |
+
rare_movies = comment_counts[comment_counts["movieId"] <= 1000].index
|
77 |
+
common_movies = ratings[~ratings["movieId"].isin(rare_movies)]
|
78 |
+
counter = 0
|
79 |
+
for user_movie in user_movies:
|
80 |
+
if user_movie[0] in common_movies["movieId"]:
|
81 |
+
user_df['userId'].append(300000)
|
82 |
+
user_df['movieId'].append(user_movie[0])
|
83 |
+
user_df['rating'].append(user_movie[1])
|
84 |
+
counter += 1
|
85 |
+
if counter <= 3:
|
86 |
+
return []
|
87 |
+
|
88 |
+
ratings.append(user_df)
|
89 |
+
comment_counts = pd.DataFrame(ratings["movieId"].value_counts())
|
90 |
+
rare_movies = comment_counts[comment_counts["movieId"] <= 1000].index
|
91 |
+
common_movies = ratings[~ratings["movieId"].isin(rare_movies)]
|
92 |
+
user_movie_df = common_movies.pivot_table(index=["userId"], columns=["movieId"], values='rating')
|
93 |
+
random_user = 300000
|
94 |
+
|
95 |
+
random_user_df = user_movie_df[user_movie_df.index == random_user]
|
96 |
+
movies_watched = random_user_df.columns[random_user_df.notna().any()].tolist()
|
97 |
+
movies_watched_df = user_movie_df[movies_watched]
|
98 |
+
user_movie_count = movies_watched_df.T.notnull().sum()
|
99 |
+
user_movie_count = user_movie_count.reset_index()
|
100 |
+
user_movie_count.columns = ["userid", "movie_count"]
|
101 |
+
perc = len(movies_watched) * 60 / 100
|
102 |
+
user_same_movies = user_movie_count[user_movie_count["movie_count"] > perc]["userid"]
|
103 |
+
final_df = movies_watched_df[movies_watched_df.index.isin(user_same_movies)]
|
104 |
+
corr_df = final_df.T.corr().unstack().sort_values().drop_duplicates()
|
105 |
+
corr_df = pd.DataFrame(corr_df, columns=["corr"])
|
106 |
+
corr_df.index.names = ["userid_1", "userid_2"]
|
107 |
+
corr_df = corr_df.reset_index()
|
108 |
+
top_users = corr_df[(corr_df["userid_1"] == random_user) & (corr_df["corr"] > 0.65)][["userid_2", "corr"]]
|
109 |
+
top_users.columns = ["userId", "corr"]
|
110 |
+
top_users_score = top_users.merge(ratings[["userId", "movieId", "rating"]], how="inner")
|
111 |
+
top_users_score["weighted_reting"] = top_users_score["corr"] * top_users_score["rating"]
|
112 |
+
recommendation_df = top_users_score.groupby("movieId").agg({"weighted_reting": "mean"})
|
113 |
+
recommendation_df = recommendation_df.reset_index()
|
114 |
+
movies_to_be_recommended = recommendation_df[recommendation_df["weighted_reting"] > 3.5].sort_values("weighted_reting", ascending=False)
|
115 |
+
return(movies_to_be_recommended.merge(movie_df[["movieId"]])["movieId"][:ret_number])
|
116 |
|
117 |
def get_video_address(row):
|
118 |
|
|
|
205 |
"Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiJmMTNkMmNiMTY4MWM3ZWEyMjNiYjgxZTUyYmMzMDUyMCIsInN1YiI6IjY0YTY5MTU1YzNiZmZlMDBjODZiYzRlZSIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.H6zk_gSiudNdZTsgwYHFQnJiVXfR_BX5DfYNIoIsRv8"
|
206 |
}
|
207 |
|
208 |
+
recommender = ContentBasedRecommender('movies_metadata.csv')
|
209 |
+
recommender.content_based_recommendation()
|
210 |
def recommend_movie(movie_name, Number_of_Recommendation):
|
211 |
+
movies = pd.read_csv('/kaggle/input/the-movies-dataset/movies_metadata.csv')
|
212 |
+
|
213 |
Number_of_Recommendation = int(Number_of_Recommendation)
|
214 |
+
names = recommends = recommender.final_recommends(movies= movie_name['Name'], result_number=Number_of_Recommendation)
|
215 |
+
outputIds = []
|
216 |
+
for i in range(len(names)):
|
217 |
+
# print(movies[movies['title'] == names[i]]['imdb_id'].iloc[0])
|
218 |
+
outputIds.append(movies[movies['title'] == names[i]]['imdb_id'].iloc[0])
|
219 |
+
# outputIds = ['tt0114709', 'tt0113497', 'tt0113228', 'tt0114885', 'tt0113041']
|
220 |
html = '''
|
221 |
<html>
|
222 |
<head>
|
|
|
238 |
video_response = requests.get(url, headers=headers)
|
239 |
video = video_response.json()
|
240 |
video = get_video_address(video['results'])
|
241 |
+
if type(video) == bool:
|
242 |
+
video = []
|
243 |
+
for dictionary in video['results']:
|
244 |
+
video.append(dictionary['key'])
|
245 |
+
if len(video) == 0:
|
246 |
+
video = ['']
|
247 |
url = "https://api.themoviedb.org/3/movie/{id}/images".format(id = outputIds[i])
|
248 |
image_response = requests.get(url, headers=headers)
|
249 |
image = image_response.json()
|
250 |
+
if len(image['backdrops']) == 0:
|
251 |
+
image['backdrops'] = [{'file_path':''}]
|
252 |
html +='''
|
253 |
<div class="bdy">
|
254 |
<div class = "top">
|
|
|
268 |
<div class="contents">
|
269 |
<img src='https://image.tmdb.org/t/p/w500'''+image['backdrops'][0]['file_path']+'''' class="img">
|
270 |
<iframe
|
271 |
+
src="https://www.youtube.com/embed/'''+video[0]+'''" class="video" height = 200px>
|
272 |
</iframe>
|
273 |
</div>
|
274 |
<h3>
|
275 |
'''+ data['overview']+'''
|
276 |
+
|
277 |
+
|
278 |
</h3>
|
279 |
</div>
|
280 |
</div>
|
|
|
289 |
|
290 |
html = ''
|
291 |
|
292 |
+
|
293 |
iface = gr.Interface(fn=recommend_movie,
|
294 |
inputs=[gr.Dataframe(headers=["Name", "Rate"],
|
295 |
datatype=["str", "number"],
|
|
|
297 |
col_count=(2, "fixed")),
|
298 |
"number"
|
299 |
],
|
300 |
+
|
301 |
outputs=gr.HTML(html),
|
302 |
title="Movie Recommender",
|
303 |
description="Enter a movie name and your rating (out of 10) for the movie. you must enter at least 3 movies and all words must start with capital letters, example : Grumpier Old Men",
|
|
|
306 |
)
|
307 |
|
308 |
iface.launch()
|
309 |
+
|