Commit
•
9061c90
1
Parent(s):
974afd4
edit app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ import pandas as pd
|
|
4 |
import pickle
|
5 |
import gdown
|
6 |
import os
|
7 |
-
|
8 |
|
9 |
|
10 |
# Set page configuration
|
@@ -125,15 +125,22 @@ st.markdown(
|
|
125 |
|
126 |
|
127 |
|
128 |
-
|
129 |
# CSV files URLs as raw data from GitHub repository
|
130 |
moviesCSV = "Data/movies.csv"
|
131 |
ratingsCSV = "Data/ratings.csv"
|
132 |
linksCSV = "Data/links.csv"
|
133 |
|
|
|
|
|
|
|
|
|
|
|
134 |
# the folloing code is used to download the similarity matrix from google drive if not exist
|
135 |
file_url = 'https://drive.google.com/uc?id=1-1bpusE96_Hh0rUxU7YmBo6RiwYLQGVy'
|
|
|
136 |
output_path = 'Models/similarity_matrix.pkl'
|
|
|
|
|
137 |
|
138 |
@st.cache_data
|
139 |
def download_model_from_google_drive(file_url, output_path):
|
@@ -144,41 +151,87 @@ def download_model_from_google_drive(file_url, output_path):
|
|
144 |
if not os.path.exists(output_path):
|
145 |
print("Downloading the similarity matrix from Googlr Drive...")
|
146 |
# change file permission
|
147 |
-
os.chmod('Models/', 0o777)
|
148 |
download_model_from_google_drive(file_url, output_path)
|
|
|
|
|
149 |
print("Download completed......")
|
150 |
|
151 |
|
152 |
|
153 |
# Dummy data for user recommendations
|
154 |
user_recommendations = {
|
155 |
-
|
156 |
-
|
157 |
}
|
158 |
|
159 |
# Function to hash passwords
|
160 |
def hash_password(password):
|
161 |
-
|
162 |
|
163 |
# Dummy user database
|
164 |
user_db = {
|
165 |
-
|
166 |
-
|
167 |
}
|
168 |
|
169 |
# Login function
|
170 |
-
def login(
|
171 |
-
if
|
172 |
return True
|
173 |
return False
|
174 |
|
|
|
|
|
|
|
175 |
# Function to fetch movie details from OMDb API
|
176 |
-
def fetch_movie_details(title, api_key="23f109b2"):
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
|
181 |
# Display movie details
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
def display_movie_details(movie):
|
183 |
if movie['Response'] == 'False':
|
184 |
st.write(f"Movie not found: {movie['Error']}")
|
@@ -187,13 +240,18 @@ def display_movie_details(movie):
|
|
187 |
movie['imdbRating'] = 0
|
188 |
imdb_rating = float(movie['imdbRating'])
|
189 |
url = f"https://www.imdb.com/title/{movie['imdbID']}/"
|
|
|
|
|
|
|
|
|
|
|
190 |
st.markdown(
|
191 |
f"""
|
192 |
<div style="
|
193 |
background-color: #313131;
|
194 |
-
border-radius:
|
195 |
-
padding:
|
196 |
-
margin:
|
197 |
box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1);
|
198 |
">
|
199 |
<div style="display: flex;">
|
@@ -204,12 +262,12 @@ def display_movie_details(movie):
|
|
204 |
</a>
|
205 |
</div>
|
206 |
<div style="flex: 3; padding-left: 20px;">
|
207 |
-
<
|
208 |
<p style="color: gray;">
|
209 |
<b>Year:</b> {movie['Year']} Rated: {movie['Rated']} <br>
|
210 |
-
<b>Genre:</b> {movie['Genre'].replace(',',' |')} <br>
|
211 |
</p>
|
212 |
-
<
|
213 |
<div style="margin-top: 10px;">
|
214 |
<div style="background-color: #e0e0e0; border-radius: 5px; overflow: hidden;">
|
215 |
<div style="width: {imdb_rating * 10}%; background-color: #4caf50; padding: 5px 0; text-align: center; color: white;">
|
@@ -226,6 +284,8 @@ def display_movie_details(movie):
|
|
226 |
|
227 |
|
228 |
|
|
|
|
|
229 |
def print_movie_details(movie):
|
230 |
st.markdown(
|
231 |
f"""
|
@@ -266,12 +326,13 @@ def load_data():
|
|
266 |
movies_df = pd.read_csv(moviesCSV)
|
267 |
ratings_df = pd.read_csv(ratingsCSV)
|
268 |
links_df = pd.read_csv(linksCSV)
|
269 |
-
|
|
|
270 |
|
271 |
# Function to load similarity matrix
|
272 |
@st.cache_data
|
273 |
-
def load_similarity_matrix():
|
274 |
-
with open(
|
275 |
similarity_df = pickle.load(f)
|
276 |
return similarity_df
|
277 |
|
@@ -311,7 +372,11 @@ def get_movie_details(movie_id, df_movies, df_ratings, df_links):
|
|
311 |
def recommend(movie, similarity_df, movies_df, ratings_df, links_df, k=5):
|
312 |
try:
|
313 |
index = movies_df[movies_df['title'] == movie].index[0]
|
|
|
|
|
|
|
314 |
distances = sorted(list(enumerate(similarity_df.iloc[index])), reverse=True, key=lambda x: x[1])
|
|
|
315 |
recommended_movies = []
|
316 |
for i in distances[1:k+1]:
|
317 |
movie_id = movies_df.iloc[i[0]]['movieId']
|
@@ -325,12 +390,12 @@ def recommend(movie, similarity_df, movies_df, ratings_df, links_df, k=5):
|
|
325 |
|
326 |
# Main app
|
327 |
|
328 |
-
movies_df, ratings_df, links_df = load_data()
|
329 |
-
print("Data loaded successfully")
|
330 |
-
print("Loading similarity matrix...")
|
331 |
-
similarity_df = load_similarity_matrix()
|
332 |
def main():
|
333 |
|
|
|
|
|
|
|
|
|
334 |
|
335 |
st.sidebar.title("Navigation")
|
336 |
menu = ["Login", "Movie Similarity"]
|
@@ -340,17 +405,25 @@ def main():
|
|
340 |
st.title("Movie Recommendations")
|
341 |
st.write("Welcome to the Movie Recommendation App!")
|
342 |
st.write("Please login to get personalized movie recommendations. username between (1 and 800)")
|
343 |
-
|
344 |
-
|
|
|
345 |
# Login form
|
346 |
st.sidebar.header("Login")
|
347 |
-
|
348 |
# password = st.sidebar.text_input("Password", type="password")
|
349 |
if st.sidebar.button("Login"):
|
350 |
-
if login(
|
351 |
st.sidebar.success("Login successful!")
|
352 |
-
|
353 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
354 |
num_cols = 2
|
355 |
cols = st.columns(num_cols)
|
356 |
for i, movie_title in enumerate(recommendations):
|
@@ -376,8 +449,8 @@ def main():
|
|
376 |
with cols[1]:
|
377 |
st.title("Choosen Movie Details:")
|
378 |
if selected_movie:
|
379 |
-
correct_Name = selected_movie[:-7]
|
380 |
-
movie = fetch_movie_details(
|
381 |
if movie['Response'] == 'True':
|
382 |
display_movie_details(movie)
|
383 |
else:
|
|
|
4 |
import pickle
|
5 |
import gdown
|
6 |
import os
|
7 |
+
from Helpers import get_user_recommendation , train_model , get_user_recommendation_XGBoost
|
8 |
|
9 |
|
10 |
# Set page configuration
|
|
|
125 |
|
126 |
|
127 |
|
|
|
128 |
# CSV files URLs as raw data from GitHub repository
|
129 |
moviesCSV = "Data/movies.csv"
|
130 |
ratingsCSV = "Data/ratings.csv"
|
131 |
linksCSV = "Data/links.csv"
|
132 |
|
133 |
+
|
134 |
+
|
135 |
+
|
136 |
+
# the folloing code is used to download the similarity matrix from google drive if not exist
|
137 |
+
|
138 |
# the folloing code is used to download the similarity matrix from google drive if not exist
|
139 |
file_url = 'https://drive.google.com/uc?id=1-1bpusE96_Hh0rUxU7YmBo6RiwYLQGVy'
|
140 |
+
DataBaseCSV = "https://drive.google.com/uc?id=11Soimwc1uKS5VGy_QROifwkdIzl8MZaV"
|
141 |
output_path = 'Models/similarity_matrix.pkl'
|
142 |
+
output_path_DataBase = 'Data/XGBoost_database.csv'
|
143 |
+
user_matrix_path = "Models\user_based_matrix.pkl"
|
144 |
|
145 |
@st.cache_data
|
146 |
def download_model_from_google_drive(file_url, output_path):
|
|
|
151 |
if not os.path.exists(output_path):
|
152 |
print("Downloading the similarity matrix from Googlr Drive...")
|
153 |
# change file permission
|
154 |
+
# os.chmod('Models/', 0o777)
|
155 |
download_model_from_google_drive(file_url, output_path)
|
156 |
+
download_model_from_google_drive(DataBaseCSV, output_path_DataBase)
|
157 |
+
|
158 |
print("Download completed......")
|
159 |
|
160 |
|
161 |
|
162 |
# Dummy data for user recommendations
|
163 |
user_recommendations = {
|
164 |
+
1: ["Inception", "The Matrix", "Interstellar"],
|
165 |
+
2: ["The Amazing Spider-Man", "District 9", "Titanic"]
|
166 |
}
|
167 |
|
168 |
# Function to hash passwords
|
169 |
def hash_password(password):
|
170 |
+
pass
|
171 |
|
172 |
# Dummy user database
|
173 |
user_db = {
|
174 |
+
1: "password123",
|
175 |
+
2: "mypassword"
|
176 |
}
|
177 |
|
178 |
# Login function
|
179 |
+
def login(username, password):
|
180 |
+
if isinstance(username, int) and username > 0 and username < 610:
|
181 |
return True
|
182 |
return False
|
183 |
|
184 |
+
|
185 |
+
|
186 |
+
|
187 |
# Function to fetch movie details from OMDb API
|
188 |
+
# def fetch_movie_details(title, api_key="23f109b2"):
|
189 |
+
# url = f"http://www.omdbapi.com/?t={title}&apikey={api_key}"
|
190 |
+
# response = requests.get(url)
|
191 |
+
# return response.json()
|
192 |
|
193 |
# Display movie details
|
194 |
+
|
195 |
+
import re
|
196 |
+
|
197 |
+
def fetch_movie_details(title, api_key_omdb="23f109b2", api_key_tmdb="b8c96e534866701532768a313b978c8b"):
|
198 |
+
# First, try the OMDb API
|
199 |
+
title = title[:-7]
|
200 |
+
title = title.replace('+', '')
|
201 |
+
url_omdb = f"http://www.omdbapi.com/?t={title}&apikey={api_key_omdb}"
|
202 |
+
response_omdb = requests.get(url_omdb)
|
203 |
+
movie = response_omdb.json()
|
204 |
+
|
205 |
+
if movie['Response'] == 'True':
|
206 |
+
return movie
|
207 |
+
else:
|
208 |
+
# If OMDb API doesn't find the movie, try the TMDb API
|
209 |
+
url_tmdb_search = f"https://api.themoviedb.org/3/search/movie?api_key={api_key_tmdb}&query={title}"
|
210 |
+
response_tmdb_search = requests.get(url_tmdb_search)
|
211 |
+
search_results = response_tmdb_search.json()
|
212 |
+
|
213 |
+
if search_results['total_results'] > 0:
|
214 |
+
movie_id = search_results['results'][0]['id']
|
215 |
+
url_tmdb_movie = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key_tmdb}"
|
216 |
+
response_tmdb_movie = requests.get(url_tmdb_movie)
|
217 |
+
tmdb_movie = response_tmdb_movie.json()
|
218 |
+
|
219 |
+
# Convert TMDb response to a similar structure as OMDb response
|
220 |
+
movie = {
|
221 |
+
'Title': tmdb_movie['title'],
|
222 |
+
'Year': tmdb_movie['release_date'].split('-')[0] if 'release_date' in tmdb_movie else 'N/A',
|
223 |
+
'Rated': 'N/A', # TMDb doesn't provide rating info in the same way
|
224 |
+
'Genre': ', '.join([genre['name'] for genre in tmdb_movie['genres']]),
|
225 |
+
'Plot': tmdb_movie['overview'],
|
226 |
+
'Poster': f"https://image.tmdb.org/t/p/w500{tmdb_movie['poster_path']}" if 'poster_path' in tmdb_movie else '',
|
227 |
+
'imdbRating': tmdb_movie['vote_average'],
|
228 |
+
'imdbID': tmdb_movie['imdb_id'],
|
229 |
+
'Response': 'True'
|
230 |
+
}
|
231 |
+
return movie
|
232 |
+
else:
|
233 |
+
return {'Response': 'False', 'Error': 'Movie not found'}
|
234 |
+
|
235 |
def display_movie_details(movie):
|
236 |
if movie['Response'] == 'False':
|
237 |
st.write(f"Movie not found: {movie['Error']}")
|
|
|
240 |
movie['imdbRating'] = 0
|
241 |
imdb_rating = float(movie['imdbRating'])
|
242 |
url = f"https://www.imdb.com/title/{movie['imdbID']}/"
|
243 |
+
|
244 |
+
# Split the plot into lines based on . or ,
|
245 |
+
plot_lines = re.split(r'[.,]', movie['Plot'])
|
246 |
+
short_plot = '. '.join(plot_lines[:3]).strip() + '.'
|
247 |
+
|
248 |
st.markdown(
|
249 |
f"""
|
250 |
<div style="
|
251 |
background-color: #313131;
|
252 |
+
border-radius: 20px;
|
253 |
+
padding: 20px;
|
254 |
+
margin: 25px 0;
|
255 |
box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1);
|
256 |
">
|
257 |
<div style="display: flex;">
|
|
|
262 |
</a>
|
263 |
</div>
|
264 |
<div style="flex: 3; padding-left: 20px;">
|
265 |
+
<h3 style="margin: 0;" anchor="{url}">{movie['Title']}</h3>
|
266 |
<p style="color: gray;">
|
267 |
<b>Year:</b> {movie['Year']} Rated: {movie['Rated']} <br>
|
268 |
+
<b>Genre:</b> {movie['Genre'].replace(',', ' |')} <br>
|
269 |
</p>
|
270 |
+
<div>{short_plot}</div>
|
271 |
<div style="margin-top: 10px;">
|
272 |
<div style="background-color: #e0e0e0; border-radius: 5px; overflow: hidden;">
|
273 |
<div style="width: {imdb_rating * 10}%; background-color: #4caf50; padding: 5px 0; text-align: center; color: white;">
|
|
|
284 |
|
285 |
|
286 |
|
287 |
+
|
288 |
+
|
289 |
def print_movie_details(movie):
|
290 |
st.markdown(
|
291 |
f"""
|
|
|
326 |
movies_df = pd.read_csv(moviesCSV)
|
327 |
ratings_df = pd.read_csv(ratingsCSV)
|
328 |
links_df = pd.read_csv(linksCSV)
|
329 |
+
DataBase = pd.read_csv(output_path_DataBase)
|
330 |
+
return movies_df, ratings_df, links_df , DataBase
|
331 |
|
332 |
# Function to load similarity matrix
|
333 |
@st.cache_data
|
334 |
+
def load_similarity_matrix(path):
|
335 |
+
with open(path, 'rb') as f:
|
336 |
similarity_df = pickle.load(f)
|
337 |
return similarity_df
|
338 |
|
|
|
372 |
def recommend(movie, similarity_df, movies_df, ratings_df, links_df, k=5):
|
373 |
try:
|
374 |
index = movies_df[movies_df['title'] == movie].index[0]
|
375 |
+
|
376 |
+
|
377 |
+
|
378 |
distances = sorted(list(enumerate(similarity_df.iloc[index])), reverse=True, key=lambda x: x[1])
|
379 |
+
|
380 |
recommended_movies = []
|
381 |
for i in distances[1:k+1]:
|
382 |
movie_id = movies_df.iloc[i[0]]['movieId']
|
|
|
390 |
|
391 |
# Main app
|
392 |
|
|
|
|
|
|
|
|
|
393 |
def main():
|
394 |
|
395 |
+
movies_df, ratings_df, links_df , DB_df = load_data()
|
396 |
+
print("Data loaded successfully")
|
397 |
+
print("Loading similarity matrix...")
|
398 |
+
similarity_df = load_similarity_matrix(output_path)
|
399 |
|
400 |
st.sidebar.title("Navigation")
|
401 |
menu = ["Login", "Movie Similarity"]
|
|
|
405 |
st.title("Movie Recommendations")
|
406 |
st.write("Welcome to the Movie Recommendation App!")
|
407 |
st.write("Please login to get personalized movie recommendations. username between (1 and 800)")
|
408 |
+
# model selection
|
409 |
+
C = st.selectbox("Select the model", ["User Similarity Matrix", "XGBoost"])
|
410 |
+
|
411 |
# Login form
|
412 |
st.sidebar.header("Login")
|
413 |
+
username = int(st.sidebar.text_input("Username"))
|
414 |
# password = st.sidebar.text_input("Password", type="password")
|
415 |
if st.sidebar.button("Login"):
|
416 |
+
if login(username, 'password'):
|
417 |
st.sidebar.success("Login successful!")
|
418 |
+
if C == "User Similarity Matrix":
|
419 |
+
user_matrix = load_similarity_matrix(user_matrix_path)
|
420 |
+
recommendations = get_user_recommendation(DB_df, user_matrix, username)
|
421 |
+
elif C == "XGBoost":
|
422 |
+
model = train_model(DB_df,username)
|
423 |
+
recommendations , user_seen_movies = get_user_recommendation_XGBoost(DB_df, model, username)
|
424 |
+
else:
|
425 |
+
recommendations = user_recommendations.get(username, [])
|
426 |
+
st.write(f"Recommendations for user number {username}:")
|
427 |
num_cols = 2
|
428 |
cols = st.columns(num_cols)
|
429 |
for i, movie_title in enumerate(recommendations):
|
|
|
449 |
with cols[1]:
|
450 |
st.title("Choosen Movie Details:")
|
451 |
if selected_movie:
|
452 |
+
# correct_Name = selected_movie[:-7]
|
453 |
+
movie = fetch_movie_details(selected_movie)
|
454 |
if movie['Response'] == 'True':
|
455 |
display_movie_details(movie)
|
456 |
else:
|