Spaces:

MohamedMotaz
/

Movie-Recommendation

Running

App Files Files Community

MohamedMotaz commited on 14 days ago

Commit

9061c90

•

1 Parent(s): 974afd4

edit app.py

Browse files

Files changed (1) hide show

app.py +108 -35

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import pandas as pd
 import pickle
 import gdown
 import os
 # Set page configuration
@@ -125,15 +125,22 @@ st.markdown(
 # CSV files URLs as raw data from GitHub repository
 moviesCSV = "Data/movies.csv"
 ratingsCSV = "Data/ratings.csv"
 linksCSV = "Data/links.csv"
 # the folloing code is used to download the similarity matrix from google drive if not exist
 file_url = 'https://drive.google.com/uc?id=1-1bpusE96_Hh0rUxU7YmBo6RiwYLQGVy'
 output_path = 'Models/similarity_matrix.pkl'
 @st.cache_data
 def download_model_from_google_drive(file_url, output_path):
@@ -144,41 +151,87 @@ def download_model_from_google_drive(file_url, output_path):
 if not os.path.exists(output_path):
     print("Downloading the similarity matrix from Googlr Drive...")
     # change file permission
-    os.chmod('Models/', 0o777)
     download_model_from_google_drive(file_url, output_path)
     print("Download completed......")
 # Dummy data for user recommendations
 user_recommendations = {
-    "1": ["Inception", "The Matrix", "Interstellar"],
-    "2": ["The Amazing Spider-Man", "District 9", "Titanic"]
 }
 # Function to hash passwords
 def hash_password(password):
-    return password
 # Dummy user database
 user_db = {
-    "1": hash_password("password123"),
-    "2": hash_password("mypassword")
 }
 # Login function
-def login(email, password):
-    if email in user_db:
         return True
     return False
 # Function to fetch movie details from OMDb API
-def fetch_movie_details(title, api_key="23f109b2"):
-    url = f"http://www.omdbapi.com/?t={title}&apikey={api_key}"
-    response = requests.get(url)
-    return response.json()
 # Display movie details
 def display_movie_details(movie):
     if movie['Response'] == 'False':
         st.write(f"Movie not found: {movie['Error']}")
@@ -187,13 +240,18 @@ def display_movie_details(movie):
         movie['imdbRating'] = 0
     imdb_rating = float(movie['imdbRating'])
     url = f"https://www.imdb.com/title/{movie['imdbID']}/"
     st.markdown(
         f"""
         <div style="
             background-color: #313131;
-            border-radius: 15px;
-            padding: 10px;
-            margin: 10px 0;
             box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1);
         ">
             <div style="display: flex;">
@@ -204,12 +262,12 @@ def display_movie_details(movie):
                     </a>
                 </div>
                 <div style="flex: 3; padding-left: 20px;">
-                    <h2 style="margin: 0;" anchor="{url}">{movie['Title']}</h2>
                     <p style="color: gray;">
                         <b>Year:</b> {movie['Year']} Rated: {movie['Rated']} <br>
-                        <b>Genre:</b> {movie['Genre'].replace(',',' |')} <br>
                     </p>
-                    <p>{movie['Plot']}</p>
                     <div style="margin-top: 10px;">
                         <div style="background-color: #e0e0e0; border-radius: 5px; overflow: hidden;">
                             <div style="width: {imdb_rating * 10}%; background-color: #4caf50; padding: 5px 0; text-align: center; color: white;">
@@ -226,6 +284,8 @@ def display_movie_details(movie):
 def print_movie_details(movie):
     st.markdown(
         f"""
@@ -266,12 +326,13 @@ def load_data():
     movies_df = pd.read_csv(moviesCSV)
     ratings_df = pd.read_csv(ratingsCSV)
     links_df = pd.read_csv(linksCSV)
-    return movies_df, ratings_df, links_df
 # Function to load similarity matrix
 @st.cache_data
-def load_similarity_matrix():
-    with open('Models/similarity_matrix.pkl', 'rb') as f:
         similarity_df = pickle.load(f)
     return similarity_df
@@ -311,7 +372,11 @@ def get_movie_details(movie_id, df_movies, df_ratings, df_links):
 def recommend(movie, similarity_df, movies_df, ratings_df, links_df, k=5):
     try:
         index = movies_df[movies_df['title'] == movie].index[0]
         distances = sorted(list(enumerate(similarity_df.iloc[index])), reverse=True, key=lambda x: x[1])
         recommended_movies = []
         for i in distances[1:k+1]:
             movie_id = movies_df.iloc[i[0]]['movieId']
@@ -325,12 +390,12 @@ def recommend(movie, similarity_df, movies_df, ratings_df, links_df, k=5):
 # Main app
-movies_df, ratings_df, links_df = load_data()
-print("Data loaded successfully")
-print("Loading similarity matrix...")
-similarity_df = load_similarity_matrix()
 def main():
     st.sidebar.title("Navigation")
     menu = ["Login", "Movie Similarity"]
@@ -340,17 +405,25 @@ def main():
         st.title("Movie Recommendations")
         st.write("Welcome to the Movie Recommendation App!")
         st.write("Please login to get personalized movie recommendations. username between (1 and 800)")
-        st.write("leve password blank for now.")
         # Login form
         st.sidebar.header("Login")
-        email = st.sidebar.text_input("Username")
         # password = st.sidebar.text_input("Password", type="password")
         if st.sidebar.button("Login"):
-            if login(email, 'password'):
                 st.sidebar.success("Login successful!")
-                recommendations = user_recommendations.get(email, [])
-                st.write(f"Recommendations for user number {email}:")
                 num_cols = 2
                 cols = st.columns(num_cols)
                 for i, movie_title in enumerate(recommendations):
@@ -376,8 +449,8 @@ def main():
         with cols[1]:
             st.title("Choosen Movie Details:")
             if selected_movie:
-                correct_Name = selected_movie[:-7]
-                movie = fetch_movie_details(correct_Name)
                 if movie['Response'] == 'True':
                     display_movie_details(movie)
                 else:

 import pickle
 import gdown
 import os
+from Helpers import get_user_recommendation , train_model , get_user_recommendation_XGBoost
 # Set page configuration
 # CSV files URLs as raw data from GitHub repository
 moviesCSV = "Data/movies.csv"
 ratingsCSV = "Data/ratings.csv"
 linksCSV = "Data/links.csv"
+# the folloing code is used to download the similarity matrix from google drive if not exist
 # the folloing code is used to download the similarity matrix from google drive if not exist
 file_url = 'https://drive.google.com/uc?id=1-1bpusE96_Hh0rUxU7YmBo6RiwYLQGVy'
+DataBaseCSV = "https://drive.google.com/uc?id=11Soimwc1uKS5VGy_QROifwkdIzl8MZaV"
 output_path = 'Models/similarity_matrix.pkl'
+output_path_DataBase = 'Data/XGBoost_database.csv'
+user_matrix_path = "Models\user_based_matrix.pkl"
 @st.cache_data
 def download_model_from_google_drive(file_url, output_path):
 if not os.path.exists(output_path):
     print("Downloading the similarity matrix from Googlr Drive...")
     # change file permission
+    # os.chmod('Models/', 0o777)
     download_model_from_google_drive(file_url, output_path)
+    download_model_from_google_drive(DataBaseCSV, output_path_DataBase)
     print("Download completed......")
 # Dummy data for user recommendations
 user_recommendations = {
+    1: ["Inception", "The Matrix", "Interstellar"],
+    2: ["The Amazing Spider-Man", "District 9", "Titanic"]
 }
 # Function to hash passwords
 def hash_password(password):
+    pass
 # Dummy user database
 user_db = {
+    1: "password123",
+    2: "mypassword"
 }
 # Login function
+def login(username, password):
+    if isinstance(username, int) and username > 0 and username < 610:
         return True
     return False
 # Function to fetch movie details from OMDb API
+# def fetch_movie_details(title, api_key="23f109b2"):
+#     url = f"http://www.omdbapi.com/?t={title}&apikey={api_key}"
+#     response = requests.get(url)
+#     return response.json()
 # Display movie details
+import re
+def fetch_movie_details(title, api_key_omdb="23f109b2", api_key_tmdb="b8c96e534866701532768a313b978c8b"):
+    # First, try the OMDb API
+    title = title[:-7]
+    title = title.replace('+', '')
+    url_omdb = f"http://www.omdbapi.com/?t={title}&apikey={api_key_omdb}"
+    response_omdb = requests.get(url_omdb)
+    movie = response_omdb.json()
+    if movie['Response'] == 'True':
+        return movie
+    else:
+        # If OMDb API doesn't find the movie, try the TMDb API
+        url_tmdb_search = f"https://api.themoviedb.org/3/search/movie?api_key={api_key_tmdb}&query={title}"
+        response_tmdb_search = requests.get(url_tmdb_search)
+        search_results = response_tmdb_search.json()
+        if search_results['total_results'] > 0:
+            movie_id = search_results['results'][0]['id']
+            url_tmdb_movie = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key_tmdb}"
+            response_tmdb_movie = requests.get(url_tmdb_movie)
+            tmdb_movie = response_tmdb_movie.json()
+            # Convert TMDb response to a similar structure as OMDb response
+            movie = {
+                'Title': tmdb_movie['title'],
+                'Year': tmdb_movie['release_date'].split('-')[0] if 'release_date' in tmdb_movie else 'N/A',
+                'Rated': 'N/A',  # TMDb doesn't provide rating info in the same way
+                'Genre': ', '.join([genre['name'] for genre in tmdb_movie['genres']]),
+                'Plot': tmdb_movie['overview'],
+                'Poster': f"https://image.tmdb.org/t/p/w500{tmdb_movie['poster_path']}" if 'poster_path' in tmdb_movie else '',
+                'imdbRating': tmdb_movie['vote_average'],
+                'imdbID': tmdb_movie['imdb_id'],
+                'Response': 'True'
+            }
+            return movie
+        else:
+            return {'Response': 'False', 'Error': 'Movie not found'}
 def display_movie_details(movie):
     if movie['Response'] == 'False':
         st.write(f"Movie not found: {movie['Error']}")
         movie['imdbRating'] = 0
     imdb_rating = float(movie['imdbRating'])
     url = f"https://www.imdb.com/title/{movie['imdbID']}/"
+    # Split the plot into lines based on . or ,
+    plot_lines = re.split(r'[.,]', movie['Plot'])
+    short_plot = '. '.join(plot_lines[:3]).strip() + '.'
     st.markdown(
         f"""
         <div style="
             background-color: #313131;
+            border-radius: 20px;
+            padding: 20px;
+            margin: 25px 0;
             box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1);
         ">
             <div style="display: flex;">
                     </a>
                 </div>
                 <div style="flex: 3; padding-left: 20px;">
+                    <h3 style="margin: 0;" anchor="{url}">{movie['Title']}</h3>
                     <p style="color: gray;">
                         <b>Year:</b> {movie['Year']} Rated: {movie['Rated']} <br>
+                        <b>Genre:</b> {movie['Genre'].replace(',', ' |')} <br>
                     </p>
+                    <div>{short_plot}</div>
                     <div style="margin-top: 10px;">
                         <div style="background-color: #e0e0e0; border-radius: 5px; overflow: hidden;">
                             <div style="width: {imdb_rating * 10}%; background-color: #4caf50; padding: 5px 0; text-align: center; color: white;">
 def print_movie_details(movie):
     st.markdown(
         f"""
     movies_df = pd.read_csv(moviesCSV)
     ratings_df = pd.read_csv(ratingsCSV)
     links_df = pd.read_csv(linksCSV)
+    DataBase = pd.read_csv(output_path_DataBase)
+    return movies_df, ratings_df, links_df , DataBase
 # Function to load similarity matrix
 @st.cache_data
+def load_similarity_matrix(path):
+    with open(path, 'rb') as f:
         similarity_df = pickle.load(f)
     return similarity_df
 def recommend(movie, similarity_df, movies_df, ratings_df, links_df, k=5):
     try:
         index = movies_df[movies_df['title'] == movie].index[0]
         distances = sorted(list(enumerate(similarity_df.iloc[index])), reverse=True, key=lambda x: x[1])
         recommended_movies = []
         for i in distances[1:k+1]:
             movie_id = movies_df.iloc[i[0]]['movieId']
 # Main app
 def main():
+    movies_df, ratings_df, links_df , DB_df = load_data()
+    print("Data loaded successfully")
+    print("Loading similarity matrix...")
+    similarity_df = load_similarity_matrix(output_path)
     st.sidebar.title("Navigation")
     menu = ["Login", "Movie Similarity"]
         st.title("Movie Recommendations")
         st.write("Welcome to the Movie Recommendation App!")
         st.write("Please login to get personalized movie recommendations. username between (1 and 800)")
+        # model selection
+        C = st.selectbox("Select the model", ["User Similarity Matrix", "XGBoost"])
         # Login form
         st.sidebar.header("Login")
+        username = int(st.sidebar.text_input("Username"))
         # password = st.sidebar.text_input("Password", type="password")
         if st.sidebar.button("Login"):
+            if login(username, 'password'):
                 st.sidebar.success("Login successful!")
+                if C == "User Similarity Matrix":
+                    user_matrix = load_similarity_matrix(user_matrix_path)
+                    recommendations = get_user_recommendation(DB_df, user_matrix, username)
+                elif C == "XGBoost":
+                    model = train_model(DB_df,username)
+                    recommendations , user_seen_movies = get_user_recommendation_XGBoost(DB_df, model, username)
+                else:
+                    recommendations = user_recommendations.get(username, [])
+                st.write(f"Recommendations for user number {username}:")
                 num_cols = 2
                 cols = st.columns(num_cols)
                 for i, movie_title in enumerate(recommendations):
         with cols[1]:
             st.title("Choosen Movie Details:")
             if selected_movie:
+                # correct_Name = selected_movie[:-7]
+                movie = fetch_movie_details(selected_movie)
                 if movie['Response'] == 'True':
                     display_movie_details(movie)
                 else: