Spaces:
Runtime error
Runtime error
import gradio as gr | |
from bs4 import BeautifulSoup | |
import requests | |
import re | |
import pandas as pd | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import scipy.stats | |
import gradio as gr | |
from sklearn.metrics.pairwise import cosine_similarity | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from datasketch import MinHashLSHForest, MinHash | |
import pandas as pd | |
import random | |
class ContentBasedRecommender: | |
def __init__(self, movies_metadata_path): | |
self.movies_metadata_path = movies_metadata_path | |
self.indices = None | |
self.df2 = None | |
self.forest = None | |
self.minhashes = None | |
def get_recommendations(self, title, top_k): | |
idx = self.indices[title] | |
query_minhash = self.minhashes[idx][1] | |
nearest_neighbors = self.forest.query(query_minhash, top_k) | |
movie_indices = [int(nn) for nn in nearest_neighbors if nn != idx][:top_k] | |
return self.df2['title'].iloc[movie_indices] | |
def final_recommends(self, movies, result_number): | |
res = [] | |
for i in range(len(movies)): | |
recommendations = self.get_recommendations(movies[i], top_k=10) | |
if len(recommendations) > 0: | |
for j in range(9): | |
res.append(recommendations.iloc[j]) | |
random.shuffle(res) | |
return res[:result_number] | |
def content_based_recommendation(self): | |
self.df2 = pd.read_csv(self.movies_metadata_path) | |
tfidf = TfidfVectorizer(stop_words='english') | |
self.df2['overview'] = self.df2['overview'].fillna('') | |
tfidf_matrix = tfidf.fit_transform(self.df2['overview']) | |
self.minhashes = [] | |
for i in range(tfidf_matrix.shape[0]): | |
vector = tfidf_matrix[i] | |
doc_id = self.df2.index[i] | |
minhash = MinHash(num_perm=128) | |
for token in vector.nonzero()[1]: | |
minhash.update(str(token).encode('utf-8')) | |
self.minhashes.append((doc_id, minhash)) | |
self.forest = MinHashLSHForest(num_perm=128) | |
for doc_id, minhash in self.minhashes: | |
self.forest.add(doc_id, minhash) | |
self.forest.index() | |
self.indices = pd.Series(self.df2.index, index=self.df2['title']).drop_duplicates() | |
def colabrative(user_movies, ret_number): | |
user_df = {'userId':[], | |
'movieId':[], | |
'rating':[] | |
} | |
ratings = pd.read_csv('/kaggle/input/the-movies-dataset/ratings.csv') | |
ratings['rating'] = ratings['rating'] * 2 | |
ratings['rating'] = ratings['rating'].astype(int) | |
comment_counts = pd.DataFrame(ratings["movieId"].value_counts()) | |
rare_movies = comment_counts[comment_counts["movieId"] <= 1000].index | |
common_movies = ratings[~ratings["movieId"].isin(rare_movies)] | |
counter = 0 | |
for user_movie in user_movies: | |
if user_movie[0] in common_movies["movieId"]: | |
user_df['userId'].append(300000) | |
user_df['movieId'].append(user_movie[0]) | |
user_df['rating'].append(user_movie[1]) | |
counter += 1 | |
if counter <= 3: | |
return [] | |
ratings.append(user_df) | |
comment_counts = pd.DataFrame(ratings["movieId"].value_counts()) | |
rare_movies = comment_counts[comment_counts["movieId"] <= 1000].index | |
common_movies = ratings[~ratings["movieId"].isin(rare_movies)] | |
user_movie_df = common_movies.pivot_table(index=["userId"], columns=["movieId"], values='rating') | |
random_user = 300000 | |
random_user_df = user_movie_df[user_movie_df.index == random_user] | |
movies_watched = random_user_df.columns[random_user_df.notna().any()].tolist() | |
movies_watched_df = user_movie_df[movies_watched] | |
user_movie_count = movies_watched_df.T.notnull().sum() | |
user_movie_count = user_movie_count.reset_index() | |
user_movie_count.columns = ["userid", "movie_count"] | |
perc = len(movies_watched) * 60 / 100 | |
user_same_movies = user_movie_count[user_movie_count["movie_count"] > perc]["userid"] | |
final_df = movies_watched_df[movies_watched_df.index.isin(user_same_movies)] | |
corr_df = final_df.T.corr().unstack().sort_values().drop_duplicates() | |
corr_df = pd.DataFrame(corr_df, columns=["corr"]) | |
corr_df.index.names = ["userid_1", "userid_2"] | |
corr_df = corr_df.reset_index() | |
top_users = corr_df[(corr_df["userid_1"] == random_user) & (corr_df["corr"] > 0.65)][["userid_2", "corr"]] | |
top_users.columns = ["userId", "corr"] | |
top_users_score = top_users.merge(ratings[["userId", "movieId", "rating"]], how="inner") | |
top_users_score["weighted_reting"] = top_users_score["corr"] * top_users_score["rating"] | |
recommendation_df = top_users_score.groupby("movieId").agg({"weighted_reting": "mean"}) | |
recommendation_df = recommendation_df.reset_index() | |
movies_to_be_recommended = recommendation_df[recommendation_df["weighted_reting"] > 3.5].sort_values("weighted_reting", ascending=False) | |
return(movies_to_be_recommended.merge(movie_df[["movieId"]])["movieId"][:ret_number]) | |
def get_video_address(row): | |
inner = [] | |
for dictionary in row: | |
type = dictionary.get('type') | |
if(type == 'Trailer'): | |
inner.append(dictionary['key']) | |
return inner | |
css_code=''' | |
.bdy{ | |
color: #eee7d8; | |
margin: 0 0 60px 0; | |
font-family: 'Gill Sans', 'Gill Sans MT', Calibri, 'Trebuchet MS', sans-serif; | |
} | |
.contents{ | |
display: flex; | |
align-content: space-around; | |
justify-content: center; | |
} | |
.img { | |
display: block; | |
width: calc(34% - 0.125rem); | |
height:auto; | |
} | |
.video{ | |
width: calc(66% - 0.125rem); | |
height:auto; | |
margin-left: 0.25rem; | |
} | |
.center { | |
border: 3px solid green; | |
} | |
.top{ | |
display: flex; | |
justify-content: space-between; | |
margin:0; | |
} | |
.inline{ | |
display: flex; | |
margin:0; | |
} | |
.time{ | |
padding: 0 0 0 15px; | |
} | |
.topleft{ | |
padding: 0px; | |
margin: 0px; | |
line-height: 0; | |
} | |
.topright{ | |
line-height: 0; | |
margin:0; | |
padding:0; | |
} | |
.name{ | |
margin: 20px 0 5px 0; | |
} | |
''' | |
def combine(x): | |
lst = [] | |
if isinstance(x, list) == True: | |
for i in x: | |
lst.append(i['name']) | |
return '|'.join(lst) | |
else: | |
np.nan | |
headers = { | |
"accept": "application/json", | |
"Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiJmMTNkMmNiMTY4MWM3ZWEyMjNiYjgxZTUyYmMzMDUyMCIsInN1YiI6IjY0YTY5MTU1YzNiZmZlMDBjODZiYzRlZSIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.H6zk_gSiudNdZTsgwYHFQnJiVXfR_BX5DfYNIoIsRv8" | |
} | |
recommender = ContentBasedRecommender('movies_metadata.csv') | |
recommender.content_based_recommendation() | |
def recommend_movie(movie_name, Number_of_Recommendation): | |
movies = pd.read_csv('movies_metadata.csv') | |
Number_of_Recommendation = int(Number_of_Recommendation) | |
names = recommends = recommender.final_recommends(movies= movie_name['Name'], result_number=Number_of_Recommendation) | |
outputIds = [] | |
for i in range(len(names)): | |
# print(movies[movies['title'] == names[i]]['imdb_id'].iloc[0]) | |
outputIds.append(movies[movies['title'] == names[i]]['imdb_id'].iloc[0]) | |
# outputIds = ['tt0114709', 'tt0113497', 'tt0113228', 'tt0114885', 'tt0113041'] | |
html = ''' | |
<html> | |
<head> | |
<link rel="stylesheet" href="styles.css" /> | |
</head> | |
<body> | |
''' | |
headers = { | |
"accept": "application/json", | |
"Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiJmMTNkMmNiMTY4MWM3ZWEyMjNiYjgxZTUyYmMzMDUyMCIsInN1YiI6IjY0YTY5MTU1YzNiZmZlMDBjODZiYzRlZSIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.H6zk_gSiudNdZTsgwYHFQnJiVXfR_BX5DfYNIoIsRv8" | |
} | |
for i in range(Number_of_Recommendation): | |
url = "https://api.themoviedb.org/3/movie/{id}?language=en-US".format(id = outputIds[i]) | |
data_response = requests.get(url, headers=headers) | |
data = data_response.json() | |
url = "https://api.themoviedb.org/3/movie/{id}/videos?language=en-US".format(id = outputIds[i]) | |
video_response = requests.get(url, headers=headers) | |
video = video_response.json() | |
video = get_video_address(video['results']) | |
if type(video) == bool: | |
video = [] | |
for dictionary in video['results']: | |
video.append(dictionary['key']) | |
if len(video) == 0: | |
video = [''] | |
url = "https://api.themoviedb.org/3/movie/{id}/images".format(id = outputIds[i]) | |
image_response = requests.get(url, headers=headers) | |
image = image_response.json() | |
if len(image['backdrops']) == 0: | |
image['backdrops'] = [{'file_path':''}] | |
html +=''' | |
<div class="bdy"> | |
<div class = "top"> | |
<div class="topleft"> | |
<h1 class="name">'''+data['title']+'''</h1> | |
<div class="inline"> | |
<h3>2022</h3> | |
<h3 class="time">'''+str(data['runtime']//60) + 'h ' + str(data['runtime']%60)+'''m</h3> | |
</div> | |
</div> | |
<div class="topright"> | |
<h1>IMDb RATING</h1> | |
<h3>⭐'''+str(round(data['vote_average'], 2))+'''/10     '''+str(data['vote_count'])+'''</h3> | |
</div> | |
</div> | |
<div class="contents"> | |
<img src='https://image.tmdb.org/t/p/w500'''+image['backdrops'][0]['file_path']+'''' class="img"> | |
<iframe | |
src="https://www.youtube.com/embed/'''+video[0]+'''" class="video" height = 200px> | |
</iframe> | |
</div> | |
<h3> | |
'''+ data['overview']+''' | |
</h3> | |
</div> | |
</div> | |
''' | |
html += ''' | |
</body> | |
</html> | |
''' | |
return html | |
html = '' | |
iface = gr.Interface(fn=recommend_movie, | |
inputs=[gr.Dataframe(headers=["Name", "Rate"], | |
datatype=["str", "number"], | |
row_count=3, | |
col_count=(2, "fixed")), | |
"number" | |
], | |
outputs=gr.HTML(html), | |
title="Movie Recommender", | |
description="Enter a movie name and your rating (out of 10) for the movie. you must enter at least 3 movies and all words must start with capital letters, example : Grumpier Old Men", | |
css = css_code, | |
theme='taithrah/Minimal' | |
) | |
iface.launch() | |