Spaces:

mr-robber
/

IMDB_Movie_Recommender

Runtime error

App Files Files Community

mr-robber commited on Jul 7, 2023

Commit

bc6208b

•

1 Parent(s): 2c50f05

Update app.py

Browse files

Files changed (1) hide show

app.py +131 -5

app.py CHANGED Viewed

@@ -3,10 +3,116 @@ from bs4 import BeautifulSoup
 import requests
 import re
 import pandas as pd
 import gradio as gr
 def get_video_address(row):
@@ -99,10 +205,18 @@ headers = {
 "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiJmMTNkMmNiMTY4MWM3ZWEyMjNiYjgxZTUyYmMzMDUyMCIsInN1YiI6IjY0YTY5MTU1YzNiZmZlMDBjODZiYzRlZSIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.H6zk_gSiudNdZTsgwYHFQnJiVXfR_BX5DfYNIoIsRv8"
 }
 def recommend_movie(movie_name, Number_of_Recommendation):
     Number_of_Recommendation = int(Number_of_Recommendation)
-    outputIds = ['tt0114709', 'tt0113497', 'tt0113228', 'tt0114885', 'tt0113041']
     html = '''
       <html>
     <head>
@@ -124,10 +238,17 @@ def recommend_movie(movie_name, Number_of_Recommendation):
         video_response = requests.get(url, headers=headers)
         video = video_response.json()
         video = get_video_address(video['results'])
         url = "https://api.themoviedb.org/3/movie/{id}/images".format(id = outputIds[i])
         image_response = requests.get(url, headers=headers)
         image = image_response.json()
         html +='''
         <div class="bdy">
             <div class = "top">
@@ -147,11 +268,13 @@ def recommend_movie(movie_name, Number_of_Recommendation):
             <div class="contents">
                 <img src='https://image.tmdb.org/t/p/w500'''+image['backdrops'][0]['file_path']+''''  class="img">
                     <iframe
-                src="https://www.youtube.com/embed/'''+video[0]+'''" class="video">
                 </iframe>
             </div>
                 <h3>
                     '''+ data['overview']+'''
                 </h3>
             </div>
         </div>
@@ -166,6 +289,7 @@ def recommend_movie(movie_name, Number_of_Recommendation):
 html = ''
 iface = gr.Interface(fn=recommend_movie,
                      inputs=[gr.Dataframe(headers=["Name", "Rate"],
                                         datatype=["str", "number"],
@@ -173,6 +297,7 @@ iface = gr.Interface(fn=recommend_movie,
                                         col_count=(2, "fixed")),
                             "number"
                             ],
                      outputs=gr.HTML(html),
                      title="Movie Recommender",
                      description="Enter a movie name and your rating (out of 10) for the movie. you must enter at least 3 movies and all words must start with capital letters, example : Grumpier Old Men",
@@ -181,3 +306,4 @@ iface = gr.Interface(fn=recommend_movie,
                     )
 iface.launch()

 import requests
 import re
 import pandas as pd
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+import scipy.stats
 import gradio as gr
+    from sklearn.metrics.pairwise import cosine_similarity
+from sklearn.feature_extraction.text import TfidfVectorizer
+from datasketch import MinHashLSHForest, MinHash
+import pandas as pd
+import random
+class ContentBasedRecommender:
+    def __init__(self, movies_metadata_path):
+        self.movies_metadata_path = movies_metadata_path
+        self.indices = None
+        self.df2 = None
+        self.forest = None
+        self.minhashes = None
+    def get_recommendations(self, title, top_k):
+        idx = self.indices[title]
+        query_minhash = self.minhashes[idx][1]
+        nearest_neighbors = self.forest.query(query_minhash, top_k)
+        movie_indices = [int(nn) for nn in nearest_neighbors if nn != idx][:top_k]
+        return self.df2['title'].iloc[movie_indices]
+    def final_recommends(self, movies, result_number):
+        res = []
+        for i in range(len(movies)):
+            recommendations = self.get_recommendations(movies[i], top_k=10)
+            if len(recommendations) > 0:
+                for j in range(9):
+                    res.append(recommendations.iloc[j])
+        random.shuffle(res)
+        return res[:result_number]
+    def content_based_recommendation(self):
+        self.df2 = pd.read_csv(self.movies_metadata_path)
+        tfidf = TfidfVectorizer(stop_words='english')
+        self.df2['overview'] = self.df2['overview'].fillna('')
+        tfidf_matrix = tfidf.fit_transform(self.df2['overview'])
+        self.minhashes = []
+        for i in range(tfidf_matrix.shape[0]):
+            vector = tfidf_matrix[i]
+            doc_id = self.df2.index[i]
+            minhash = MinHash(num_perm=128)
+            for token in vector.nonzero()[1]:
+                minhash.update(str(token).encode('utf-8'))
+            self.minhashes.append((doc_id, minhash))
+        self.forest = MinHashLSHForest(num_perm=128)
+        for doc_id, minhash in self.minhashes:
+            self.forest.add(doc_id, minhash)
+        self.forest.index()
+        self.indices = pd.Series(self.df2.index, index=self.df2['title']).drop_duplicates()
+        def colabrative(user_movies, ret_number):
+            user_df = {'userId':[],
+                      'movieId':[],
+                      'rating'[]
+                      }
+            ratings = pd.read_csv('/kaggle/input/the-movies-dataset/ratings.csv')
+            ratings['rating'] = ratings['rating'] * 2
+            ratings['rating'] = ratings['rating'].astype(int)
+            comment_counts = pd.DataFrame(ratings["movieId"].value_counts())
+            rare_movies = comment_counts[comment_counts["movieId"] <= 1000].index
+            common_movies = ratings[~ratings["movieId"].isin(rare_movies)]
+            counter = 0
+            for user_movie in user_movies:
+                if user_movie[0] in common_movies["movieId"]:
+                    user_df['userId'].append(300000)
+                    user_df['movieId'].append(user_movie[0])
+                    user_df['rating'].append(user_movie[1])
+                    counter += 1
+            if counter <= 3:
+                return []
+            ratings.append(user_df)
+            comment_counts = pd.DataFrame(ratings["movieId"].value_counts())
+            rare_movies = comment_counts[comment_counts["movieId"] <= 1000].index
+            common_movies = ratings[~ratings["movieId"].isin(rare_movies)]
+            user_movie_df = common_movies.pivot_table(index=["userId"], columns=["movieId"], values='rating')
+            random_user = 300000
+            random_user_df = user_movie_df[user_movie_df.index == random_user]
+            movies_watched = random_user_df.columns[random_user_df.notna().any()].tolist()
+            movies_watched_df = user_movie_df[movies_watched]
+            user_movie_count = movies_watched_df.T.notnull().sum()
+            user_movie_count = user_movie_count.reset_index()
+            user_movie_count.columns = ["userid", "movie_count"]
+            perc = len(movies_watched) * 60  / 100
+            user_same_movies = user_movie_count[user_movie_count["movie_count"] > perc]["userid"]
+            final_df = movies_watched_df[movies_watched_df.index.isin(user_same_movies)]
+            corr_df = final_df.T.corr().unstack().sort_values().drop_duplicates()
+            corr_df = pd.DataFrame(corr_df, columns=["corr"])
+            corr_df.index.names = ["userid_1", "userid_2"]
+            corr_df = corr_df.reset_index()
+            top_users = corr_df[(corr_df["userid_1"] == random_user) & (corr_df["corr"] > 0.65)][["userid_2", "corr"]]
+            top_users.columns = ["userId", "corr"]
+            top_users_score = top_users.merge(ratings[["userId", "movieId", "rating"]], how="inner")
+            top_users_score["weighted_reting"] = top_users_score["corr"] * top_users_score["rating"]
+            recommendation_df = top_users_score.groupby("movieId").agg({"weighted_reting": "mean"})
+            recommendation_df = recommendation_df.reset_index()
+            movies_to_be_recommended = recommendation_df[recommendation_df["weighted_reting"] > 3.5].sort_values("weighted_reting", ascending=False)
+            return(movies_to_be_recommended.merge(movie_df[["movieId"]])["movieId"][:ret_number])
 def get_video_address(row):
 "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiJmMTNkMmNiMTY4MWM3ZWEyMjNiYjgxZTUyYmMzMDUyMCIsInN1YiI6IjY0YTY5MTU1YzNiZmZlMDBjODZiYzRlZSIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.H6zk_gSiudNdZTsgwYHFQnJiVXfR_BX5DfYNIoIsRv8"
 }
+recommender = ContentBasedRecommender('movies_metadata.csv')
+recommender.content_based_recommendation()
 def recommend_movie(movie_name, Number_of_Recommendation):
+    movies = pd.read_csv('/kaggle/input/the-movies-dataset/movies_metadata.csv')
     Number_of_Recommendation = int(Number_of_Recommendation)
+    names = recommends = recommender.final_recommends(movies= movie_name['Name'], result_number=Number_of_Recommendation)
+    outputIds = []
+    for i in range(len(names)):
+#         print(movies[movies['title'] == names[i]]['imdb_id'].iloc[0])
+        outputIds.append(movies[movies['title'] == names[i]]['imdb_id'].iloc[0])
+#     outputIds = ['tt0114709', 'tt0113497', 'tt0113228', 'tt0114885', 'tt0113041']
     html = '''
       <html>
     <head>
         video_response = requests.get(url, headers=headers)
         video = video_response.json()
         video = get_video_address(video['results'])
+        if type(video) == bool:
+            video = []
+            for dictionary in video['results']:
+                video.append(dictionary['key'])
+        if len(video) == 0:
+            video = ['']
         url = "https://api.themoviedb.org/3/movie/{id}/images".format(id = outputIds[i])
         image_response = requests.get(url, headers=headers)
         image = image_response.json()
+        if len(image['backdrops']) == 0:
+            image['backdrops'] = [{'file_path':''}]
         html +='''
         <div class="bdy">
             <div class = "top">
             <div class="contents">
                 <img src='https://image.tmdb.org/t/p/w500'''+image['backdrops'][0]['file_path']+''''  class="img">
                     <iframe
+                src="https://www.youtube.com/embed/'''+video[0]+'''" class="video" height = 200px>
                 </iframe>
             </div>
                 <h3>
                     '''+ data['overview']+'''
                 </h3>
             </div>
         </div>
 html = ''
 iface = gr.Interface(fn=recommend_movie,
                      inputs=[gr.Dataframe(headers=["Name", "Rate"],
                                         datatype=["str", "number"],
                                         col_count=(2, "fixed")),
                             "number"
                             ],
                      outputs=gr.HTML(html),
                      title="Movie Recommender",
                      description="Enter a movie name and your rating (out of 10) for the movie. you must enter at least 3 movies and all words must start with capital letters, example : Grumpier Old Men",
                     )
 iface.launch()