mr-robber's picture
Update app.py
244fa3b
import gradio as gr
from bs4 import BeautifulSoup
import requests
import re
import pandas as pd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats
import gradio as gr
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from datasketch import MinHashLSHForest, MinHash
import pandas as pd
import random
class ContentBasedRecommender:
def __init__(self, movies_metadata_path):
self.movies_metadata_path = movies_metadata_path
self.indices = None
self.df2 = None
self.forest = None
self.minhashes = None
def get_recommendations(self, title, top_k):
idx = self.indices[title]
query_minhash = self.minhashes[idx][1]
nearest_neighbors = self.forest.query(query_minhash, top_k)
movie_indices = [int(nn) for nn in nearest_neighbors if nn != idx][:top_k]
return self.df2['title'].iloc[movie_indices]
def final_recommends(self, movies, result_number):
res = []
for i in range(len(movies)):
recommendations = self.get_recommendations(movies[i], top_k=10)
if len(recommendations) > 0:
for j in range(9):
res.append(recommendations.iloc[j])
random.shuffle(res)
return res[:result_number]
def content_based_recommendation(self):
self.df2 = pd.read_csv(self.movies_metadata_path)
tfidf = TfidfVectorizer(stop_words='english')
self.df2['overview'] = self.df2['overview'].fillna('')
tfidf_matrix = tfidf.fit_transform(self.df2['overview'])
self.minhashes = []
for i in range(tfidf_matrix.shape[0]):
vector = tfidf_matrix[i]
doc_id = self.df2.index[i]
minhash = MinHash(num_perm=128)
for token in vector.nonzero()[1]:
minhash.update(str(token).encode('utf-8'))
self.minhashes.append((doc_id, minhash))
self.forest = MinHashLSHForest(num_perm=128)
for doc_id, minhash in self.minhashes:
self.forest.add(doc_id, minhash)
self.forest.index()
self.indices = pd.Series(self.df2.index, index=self.df2['title']).drop_duplicates()
def colabrative(user_movies, ret_number):
user_df = {'userId':[],
'movieId':[],
'rating':[]
}
ratings = pd.read_csv('/kaggle/input/the-movies-dataset/ratings.csv')
ratings['rating'] = ratings['rating'] * 2
ratings['rating'] = ratings['rating'].astype(int)
comment_counts = pd.DataFrame(ratings["movieId"].value_counts())
rare_movies = comment_counts[comment_counts["movieId"] <= 1000].index
common_movies = ratings[~ratings["movieId"].isin(rare_movies)]
counter = 0
for user_movie in user_movies:
if user_movie[0] in common_movies["movieId"]:
user_df['userId'].append(300000)
user_df['movieId'].append(user_movie[0])
user_df['rating'].append(user_movie[1])
counter += 1
if counter <= 3:
return []
ratings.append(user_df)
comment_counts = pd.DataFrame(ratings["movieId"].value_counts())
rare_movies = comment_counts[comment_counts["movieId"] <= 1000].index
common_movies = ratings[~ratings["movieId"].isin(rare_movies)]
user_movie_df = common_movies.pivot_table(index=["userId"], columns=["movieId"], values='rating')
random_user = 300000
random_user_df = user_movie_df[user_movie_df.index == random_user]
movies_watched = random_user_df.columns[random_user_df.notna().any()].tolist()
movies_watched_df = user_movie_df[movies_watched]
user_movie_count = movies_watched_df.T.notnull().sum()
user_movie_count = user_movie_count.reset_index()
user_movie_count.columns = ["userid", "movie_count"]
perc = len(movies_watched) * 60 / 100
user_same_movies = user_movie_count[user_movie_count["movie_count"] > perc]["userid"]
final_df = movies_watched_df[movies_watched_df.index.isin(user_same_movies)]
corr_df = final_df.T.corr().unstack().sort_values().drop_duplicates()
corr_df = pd.DataFrame(corr_df, columns=["corr"])
corr_df.index.names = ["userid_1", "userid_2"]
corr_df = corr_df.reset_index()
top_users = corr_df[(corr_df["userid_1"] == random_user) & (corr_df["corr"] > 0.65)][["userid_2", "corr"]]
top_users.columns = ["userId", "corr"]
top_users_score = top_users.merge(ratings[["userId", "movieId", "rating"]], how="inner")
top_users_score["weighted_reting"] = top_users_score["corr"] * top_users_score["rating"]
recommendation_df = top_users_score.groupby("movieId").agg({"weighted_reting": "mean"})
recommendation_df = recommendation_df.reset_index()
movies_to_be_recommended = recommendation_df[recommendation_df["weighted_reting"] > 3.5].sort_values("weighted_reting", ascending=False)
return(movies_to_be_recommended.merge(movie_df[["movieId"]])["movieId"][:ret_number])
def get_video_address(row):
inner = []
for dictionary in row:
type = dictionary.get('type')
if(type == 'Trailer'):
inner.append(dictionary['key'])
return inner
css_code='''
.bdy{
color: #eee7d8;
margin: 0 0 60px 0;
font-family: 'Gill Sans', 'Gill Sans MT', Calibri, 'Trebuchet MS', sans-serif;
}
.contents{
display: flex;
align-content: space-around;
justify-content: center;
}
.img {
display: block;
width: calc(34% - 0.125rem);
height:auto;
}
.video{
width: calc(66% - 0.125rem);
height:auto;
margin-left: 0.25rem;
}
.center {
border: 3px solid green;
}
.top{
display: flex;
justify-content: space-between;
margin:0;
}
.inline{
display: flex;
margin:0;
}
.time{
padding: 0 0 0 15px;
}
.topleft{
padding: 0px;
margin: 0px;
line-height: 0;
}
.topright{
line-height: 0;
margin:0;
padding:0;
}
.name{
margin: 20px 0 5px 0;
}
'''
def combine(x):
lst = []
if isinstance(x, list) == True:
for i in x:
lst.append(i['name'])
return '|'.join(lst)
else:
np.nan
headers = {
"accept": "application/json",
"Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiJmMTNkMmNiMTY4MWM3ZWEyMjNiYjgxZTUyYmMzMDUyMCIsInN1YiI6IjY0YTY5MTU1YzNiZmZlMDBjODZiYzRlZSIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.H6zk_gSiudNdZTsgwYHFQnJiVXfR_BX5DfYNIoIsRv8"
}
recommender = ContentBasedRecommender('movies_metadata.csv')
recommender.content_based_recommendation()
def recommend_movie(movie_name, Number_of_Recommendation):
movies = pd.read_csv('movies_metadata.csv')
Number_of_Recommendation = int(Number_of_Recommendation)
names = recommends = recommender.final_recommends(movies= movie_name['Name'], result_number=Number_of_Recommendation)
outputIds = []
for i in range(len(names)):
# print(movies[movies['title'] == names[i]]['imdb_id'].iloc[0])
outputIds.append(movies[movies['title'] == names[i]]['imdb_id'].iloc[0])
# outputIds = ['tt0114709', 'tt0113497', 'tt0113228', 'tt0114885', 'tt0113041']
html = '''
<html>
<head>
<link rel="stylesheet" href="styles.css" />
</head>
<body>
'''
headers = {
"accept": "application/json",
"Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiJmMTNkMmNiMTY4MWM3ZWEyMjNiYjgxZTUyYmMzMDUyMCIsInN1YiI6IjY0YTY5MTU1YzNiZmZlMDBjODZiYzRlZSIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.H6zk_gSiudNdZTsgwYHFQnJiVXfR_BX5DfYNIoIsRv8"
}
for i in range(Number_of_Recommendation):
url = "https://api.themoviedb.org/3/movie/{id}?language=en-US".format(id = outputIds[i])
data_response = requests.get(url, headers=headers)
data = data_response.json()
url = "https://api.themoviedb.org/3/movie/{id}/videos?language=en-US".format(id = outputIds[i])
video_response = requests.get(url, headers=headers)
video = video_response.json()
video = get_video_address(video['results'])
if type(video) == bool:
video = []
for dictionary in video['results']:
video.append(dictionary['key'])
if len(video) == 0:
video = ['']
url = "https://api.themoviedb.org/3/movie/{id}/images".format(id = outputIds[i])
image_response = requests.get(url, headers=headers)
image = image_response.json()
if len(image['backdrops']) == 0:
image['backdrops'] = [{'file_path':''}]
html +='''
<div class="bdy">
<div class = "top">
<div class="topleft">
<h1 class="name">'''+data['title']+'''</h1>
<div class="inline">
<h3>2022</h3>
<h3 class="time">'''+str(data['runtime']//60) + 'h ' + str(data['runtime']%60)+'''m</h3>
</div>
</div>
<div class="topright">
<h1>IMDb RATING</h1>
<h3>⭐'''+str(round(data['vote_average'], 2))+'''/10 &nbsp &nbsp '''+str(data['vote_count'])+'''</h3>
</div>
</div>
<div class="contents">
<img src='https://image.tmdb.org/t/p/w500'''+image['backdrops'][0]['file_path']+'''' class="img">
<iframe
src="https://www.youtube.com/embed/'''+video[0]+'''" class="video" height = 200px>
</iframe>
</div>
<h3>
'''+ data['overview']+'''
</h3>
</div>
</div>
'''
html += '''
</body>
</html>
'''
return html
html = ''
iface = gr.Interface(fn=recommend_movie,
inputs=[gr.Dataframe(headers=["Name", "Rate"],
datatype=["str", "number"],
row_count=3,
col_count=(2, "fixed")),
"number"
],
outputs=gr.HTML(html),
title="Movie Recommender",
description="Enter a movie name and your rating (out of 10) for the movie. you must enter at least 3 movies and all words must start with capital letters, example : Grumpier Old Men",
css = css_code,
theme='taithrah/Minimal'
)
iface.launch()