MohamedMotaz commited on
Commit
9061c90
1 Parent(s): 974afd4

edit app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -35
app.py CHANGED
@@ -4,7 +4,7 @@ import pandas as pd
4
  import pickle
5
  import gdown
6
  import os
7
-
8
 
9
 
10
  # Set page configuration
@@ -125,15 +125,22 @@ st.markdown(
125
 
126
 
127
 
128
-
129
  # CSV files URLs as raw data from GitHub repository
130
  moviesCSV = "Data/movies.csv"
131
  ratingsCSV = "Data/ratings.csv"
132
  linksCSV = "Data/links.csv"
133
 
 
 
 
 
 
134
  # the folloing code is used to download the similarity matrix from google drive if not exist
135
  file_url = 'https://drive.google.com/uc?id=1-1bpusE96_Hh0rUxU7YmBo6RiwYLQGVy'
 
136
  output_path = 'Models/similarity_matrix.pkl'
 
 
137
 
138
  @st.cache_data
139
  def download_model_from_google_drive(file_url, output_path):
@@ -144,41 +151,87 @@ def download_model_from_google_drive(file_url, output_path):
144
  if not os.path.exists(output_path):
145
  print("Downloading the similarity matrix from Googlr Drive...")
146
  # change file permission
147
- os.chmod('Models/', 0o777)
148
  download_model_from_google_drive(file_url, output_path)
 
 
149
  print("Download completed......")
150
 
151
 
152
 
153
  # Dummy data for user recommendations
154
  user_recommendations = {
155
- "1": ["Inception", "The Matrix", "Interstellar"],
156
- "2": ["The Amazing Spider-Man", "District 9", "Titanic"]
157
  }
158
 
159
  # Function to hash passwords
160
  def hash_password(password):
161
- return password
162
 
163
  # Dummy user database
164
  user_db = {
165
- "1": hash_password("password123"),
166
- "2": hash_password("mypassword")
167
  }
168
 
169
  # Login function
170
- def login(email, password):
171
- if email in user_db:
172
  return True
173
  return False
174
 
 
 
 
175
  # Function to fetch movie details from OMDb API
176
- def fetch_movie_details(title, api_key="23f109b2"):
177
- url = f"http://www.omdbapi.com/?t={title}&apikey={api_key}"
178
- response = requests.get(url)
179
- return response.json()
180
 
181
  # Display movie details
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  def display_movie_details(movie):
183
  if movie['Response'] == 'False':
184
  st.write(f"Movie not found: {movie['Error']}")
@@ -187,13 +240,18 @@ def display_movie_details(movie):
187
  movie['imdbRating'] = 0
188
  imdb_rating = float(movie['imdbRating'])
189
  url = f"https://www.imdb.com/title/{movie['imdbID']}/"
 
 
 
 
 
190
  st.markdown(
191
  f"""
192
  <div style="
193
  background-color: #313131;
194
- border-radius: 15px;
195
- padding: 10px;
196
- margin: 10px 0;
197
  box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1);
198
  ">
199
  <div style="display: flex;">
@@ -204,12 +262,12 @@ def display_movie_details(movie):
204
  </a>
205
  </div>
206
  <div style="flex: 3; padding-left: 20px;">
207
- <h2 style="margin: 0;" anchor="{url}">{movie['Title']}</h2>
208
  <p style="color: gray;">
209
  <b>Year:</b> {movie['Year']} Rated: {movie['Rated']} <br>
210
- <b>Genre:</b> {movie['Genre'].replace(',',' |')} <br>
211
  </p>
212
- <p>{movie['Plot']}</p>
213
  <div style="margin-top: 10px;">
214
  <div style="background-color: #e0e0e0; border-radius: 5px; overflow: hidden;">
215
  <div style="width: {imdb_rating * 10}%; background-color: #4caf50; padding: 5px 0; text-align: center; color: white;">
@@ -226,6 +284,8 @@ def display_movie_details(movie):
226
 
227
 
228
 
 
 
229
  def print_movie_details(movie):
230
  st.markdown(
231
  f"""
@@ -266,12 +326,13 @@ def load_data():
266
  movies_df = pd.read_csv(moviesCSV)
267
  ratings_df = pd.read_csv(ratingsCSV)
268
  links_df = pd.read_csv(linksCSV)
269
- return movies_df, ratings_df, links_df
 
270
 
271
  # Function to load similarity matrix
272
  @st.cache_data
273
- def load_similarity_matrix():
274
- with open('Models/similarity_matrix.pkl', 'rb') as f:
275
  similarity_df = pickle.load(f)
276
  return similarity_df
277
 
@@ -311,7 +372,11 @@ def get_movie_details(movie_id, df_movies, df_ratings, df_links):
311
  def recommend(movie, similarity_df, movies_df, ratings_df, links_df, k=5):
312
  try:
313
  index = movies_df[movies_df['title'] == movie].index[0]
 
 
 
314
  distances = sorted(list(enumerate(similarity_df.iloc[index])), reverse=True, key=lambda x: x[1])
 
315
  recommended_movies = []
316
  for i in distances[1:k+1]:
317
  movie_id = movies_df.iloc[i[0]]['movieId']
@@ -325,12 +390,12 @@ def recommend(movie, similarity_df, movies_df, ratings_df, links_df, k=5):
325
 
326
  # Main app
327
 
328
- movies_df, ratings_df, links_df = load_data()
329
- print("Data loaded successfully")
330
- print("Loading similarity matrix...")
331
- similarity_df = load_similarity_matrix()
332
  def main():
333
 
 
 
 
 
334
 
335
  st.sidebar.title("Navigation")
336
  menu = ["Login", "Movie Similarity"]
@@ -340,17 +405,25 @@ def main():
340
  st.title("Movie Recommendations")
341
  st.write("Welcome to the Movie Recommendation App!")
342
  st.write("Please login to get personalized movie recommendations. username between (1 and 800)")
343
- st.write("leve password blank for now.")
344
-
 
345
  # Login form
346
  st.sidebar.header("Login")
347
- email = st.sidebar.text_input("Username")
348
  # password = st.sidebar.text_input("Password", type="password")
349
  if st.sidebar.button("Login"):
350
- if login(email, 'password'):
351
  st.sidebar.success("Login successful!")
352
- recommendations = user_recommendations.get(email, [])
353
- st.write(f"Recommendations for user number {email}:")
 
 
 
 
 
 
 
354
  num_cols = 2
355
  cols = st.columns(num_cols)
356
  for i, movie_title in enumerate(recommendations):
@@ -376,8 +449,8 @@ def main():
376
  with cols[1]:
377
  st.title("Choosen Movie Details:")
378
  if selected_movie:
379
- correct_Name = selected_movie[:-7]
380
- movie = fetch_movie_details(correct_Name)
381
  if movie['Response'] == 'True':
382
  display_movie_details(movie)
383
  else:
 
4
  import pickle
5
  import gdown
6
  import os
7
+ from Helpers import get_user_recommendation , train_model , get_user_recommendation_XGBoost
8
 
9
 
10
  # Set page configuration
 
125
 
126
 
127
 
 
128
  # CSV files URLs as raw data from GitHub repository
129
  moviesCSV = "Data/movies.csv"
130
  ratingsCSV = "Data/ratings.csv"
131
  linksCSV = "Data/links.csv"
132
 
133
+
134
+
135
+
136
+ # the folloing code is used to download the similarity matrix from google drive if not exist
137
+
138
  # the folloing code is used to download the similarity matrix from google drive if not exist
139
  file_url = 'https://drive.google.com/uc?id=1-1bpusE96_Hh0rUxU7YmBo6RiwYLQGVy'
140
+ DataBaseCSV = "https://drive.google.com/uc?id=11Soimwc1uKS5VGy_QROifwkdIzl8MZaV"
141
  output_path = 'Models/similarity_matrix.pkl'
142
+ output_path_DataBase = 'Data/XGBoost_database.csv'
143
+ user_matrix_path = "Models\user_based_matrix.pkl"
144
 
145
  @st.cache_data
146
  def download_model_from_google_drive(file_url, output_path):
 
151
  if not os.path.exists(output_path):
152
  print("Downloading the similarity matrix from Googlr Drive...")
153
  # change file permission
154
+ # os.chmod('Models/', 0o777)
155
  download_model_from_google_drive(file_url, output_path)
156
+ download_model_from_google_drive(DataBaseCSV, output_path_DataBase)
157
+
158
  print("Download completed......")
159
 
160
 
161
 
162
  # Dummy data for user recommendations
163
  user_recommendations = {
164
+ 1: ["Inception", "The Matrix", "Interstellar"],
165
+ 2: ["The Amazing Spider-Man", "District 9", "Titanic"]
166
  }
167
 
168
  # Function to hash passwords
169
  def hash_password(password):
170
+ pass
171
 
172
  # Dummy user database
173
  user_db = {
174
+ 1: "password123",
175
+ 2: "mypassword"
176
  }
177
 
178
  # Login function
179
+ def login(username, password):
180
+ if isinstance(username, int) and username > 0 and username < 610:
181
  return True
182
  return False
183
 
184
+
185
+
186
+
187
  # Function to fetch movie details from OMDb API
188
+ # def fetch_movie_details(title, api_key="23f109b2"):
189
+ # url = f"http://www.omdbapi.com/?t={title}&apikey={api_key}"
190
+ # response = requests.get(url)
191
+ # return response.json()
192
 
193
  # Display movie details
194
+
195
+ import re
196
+
197
+ def fetch_movie_details(title, api_key_omdb="23f109b2", api_key_tmdb="b8c96e534866701532768a313b978c8b"):
198
+ # First, try the OMDb API
199
+ title = title[:-7]
200
+ title = title.replace('+', '')
201
+ url_omdb = f"http://www.omdbapi.com/?t={title}&apikey={api_key_omdb}"
202
+ response_omdb = requests.get(url_omdb)
203
+ movie = response_omdb.json()
204
+
205
+ if movie['Response'] == 'True':
206
+ return movie
207
+ else:
208
+ # If OMDb API doesn't find the movie, try the TMDb API
209
+ url_tmdb_search = f"https://api.themoviedb.org/3/search/movie?api_key={api_key_tmdb}&query={title}"
210
+ response_tmdb_search = requests.get(url_tmdb_search)
211
+ search_results = response_tmdb_search.json()
212
+
213
+ if search_results['total_results'] > 0:
214
+ movie_id = search_results['results'][0]['id']
215
+ url_tmdb_movie = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key_tmdb}"
216
+ response_tmdb_movie = requests.get(url_tmdb_movie)
217
+ tmdb_movie = response_tmdb_movie.json()
218
+
219
+ # Convert TMDb response to a similar structure as OMDb response
220
+ movie = {
221
+ 'Title': tmdb_movie['title'],
222
+ 'Year': tmdb_movie['release_date'].split('-')[0] if 'release_date' in tmdb_movie else 'N/A',
223
+ 'Rated': 'N/A', # TMDb doesn't provide rating info in the same way
224
+ 'Genre': ', '.join([genre['name'] for genre in tmdb_movie['genres']]),
225
+ 'Plot': tmdb_movie['overview'],
226
+ 'Poster': f"https://image.tmdb.org/t/p/w500{tmdb_movie['poster_path']}" if 'poster_path' in tmdb_movie else '',
227
+ 'imdbRating': tmdb_movie['vote_average'],
228
+ 'imdbID': tmdb_movie['imdb_id'],
229
+ 'Response': 'True'
230
+ }
231
+ return movie
232
+ else:
233
+ return {'Response': 'False', 'Error': 'Movie not found'}
234
+
235
  def display_movie_details(movie):
236
  if movie['Response'] == 'False':
237
  st.write(f"Movie not found: {movie['Error']}")
 
240
  movie['imdbRating'] = 0
241
  imdb_rating = float(movie['imdbRating'])
242
  url = f"https://www.imdb.com/title/{movie['imdbID']}/"
243
+
244
+ # Split the plot into lines based on . or ,
245
+ plot_lines = re.split(r'[.,]', movie['Plot'])
246
+ short_plot = '. '.join(plot_lines[:3]).strip() + '.'
247
+
248
  st.markdown(
249
  f"""
250
  <div style="
251
  background-color: #313131;
252
+ border-radius: 20px;
253
+ padding: 20px;
254
+ margin: 25px 0;
255
  box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1);
256
  ">
257
  <div style="display: flex;">
 
262
  </a>
263
  </div>
264
  <div style="flex: 3; padding-left: 20px;">
265
+ <h3 style="margin: 0;" anchor="{url}">{movie['Title']}</h3>
266
  <p style="color: gray;">
267
  <b>Year:</b> {movie['Year']} Rated: {movie['Rated']} <br>
268
+ <b>Genre:</b> {movie['Genre'].replace(',', ' |')} <br>
269
  </p>
270
+ <div>{short_plot}</div>
271
  <div style="margin-top: 10px;">
272
  <div style="background-color: #e0e0e0; border-radius: 5px; overflow: hidden;">
273
  <div style="width: {imdb_rating * 10}%; background-color: #4caf50; padding: 5px 0; text-align: center; color: white;">
 
284
 
285
 
286
 
287
+
288
+
289
  def print_movie_details(movie):
290
  st.markdown(
291
  f"""
 
326
  movies_df = pd.read_csv(moviesCSV)
327
  ratings_df = pd.read_csv(ratingsCSV)
328
  links_df = pd.read_csv(linksCSV)
329
+ DataBase = pd.read_csv(output_path_DataBase)
330
+ return movies_df, ratings_df, links_df , DataBase
331
 
332
  # Function to load similarity matrix
333
  @st.cache_data
334
+ def load_similarity_matrix(path):
335
+ with open(path, 'rb') as f:
336
  similarity_df = pickle.load(f)
337
  return similarity_df
338
 
 
372
  def recommend(movie, similarity_df, movies_df, ratings_df, links_df, k=5):
373
  try:
374
  index = movies_df[movies_df['title'] == movie].index[0]
375
+
376
+
377
+
378
  distances = sorted(list(enumerate(similarity_df.iloc[index])), reverse=True, key=lambda x: x[1])
379
+
380
  recommended_movies = []
381
  for i in distances[1:k+1]:
382
  movie_id = movies_df.iloc[i[0]]['movieId']
 
390
 
391
  # Main app
392
 
 
 
 
 
393
  def main():
394
 
395
+ movies_df, ratings_df, links_df , DB_df = load_data()
396
+ print("Data loaded successfully")
397
+ print("Loading similarity matrix...")
398
+ similarity_df = load_similarity_matrix(output_path)
399
 
400
  st.sidebar.title("Navigation")
401
  menu = ["Login", "Movie Similarity"]
 
405
  st.title("Movie Recommendations")
406
  st.write("Welcome to the Movie Recommendation App!")
407
  st.write("Please login to get personalized movie recommendations. username between (1 and 800)")
408
+ # model selection
409
+ C = st.selectbox("Select the model", ["User Similarity Matrix", "XGBoost"])
410
+
411
  # Login form
412
  st.sidebar.header("Login")
413
+ username = int(st.sidebar.text_input("Username"))
414
  # password = st.sidebar.text_input("Password", type="password")
415
  if st.sidebar.button("Login"):
416
+ if login(username, 'password'):
417
  st.sidebar.success("Login successful!")
418
+ if C == "User Similarity Matrix":
419
+ user_matrix = load_similarity_matrix(user_matrix_path)
420
+ recommendations = get_user_recommendation(DB_df, user_matrix, username)
421
+ elif C == "XGBoost":
422
+ model = train_model(DB_df,username)
423
+ recommendations , user_seen_movies = get_user_recommendation_XGBoost(DB_df, model, username)
424
+ else:
425
+ recommendations = user_recommendations.get(username, [])
426
+ st.write(f"Recommendations for user number {username}:")
427
  num_cols = 2
428
  cols = st.columns(num_cols)
429
  for i, movie_title in enumerate(recommendations):
 
449
  with cols[1]:
450
  st.title("Choosen Movie Details:")
451
  if selected_movie:
452
+ # correct_Name = selected_movie[:-7]
453
+ movie = fetch_movie_details(selected_movie)
454
  if movie['Response'] == 'True':
455
  display_movie_details(movie)
456
  else: