Eng-AsmaaYosef
commited on
Commit
•
f86110e
1
Parent(s):
65c03cd
Upload 2 files
Browse files- Helpers.py +112 -0
- app.py +507 -0
Helpers.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
4 |
+
from sklearn.metrics import mean_squared_error
|
5 |
+
from xgboost import XGBRegressor
|
6 |
+
|
7 |
+
|
8 |
+
def train_model(data,user_id, test=None, eval = False):
|
9 |
+
|
10 |
+
|
11 |
+
# select only user data
|
12 |
+
train_user = data[data['userId']==user_id]
|
13 |
+
|
14 |
+
|
15 |
+
X_train = train_user.drop(columns=['userId','rating', 'Train', 'title'])
|
16 |
+
y_train = train_user['rating']
|
17 |
+
|
18 |
+
model = XGBRegressor()
|
19 |
+
model.fit(X_train,y_train)
|
20 |
+
|
21 |
+
if eval:
|
22 |
+
test_user = test[test['userId']== user_id]
|
23 |
+
X_test = test_user.drop(columns=['userId','rating', 'Train', 'title'])
|
24 |
+
y_test = test_user['rating']
|
25 |
+
y_pred = model.predict(X_test)
|
26 |
+
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
|
27 |
+
print(f'RMSE: {rmse:.4f}')
|
28 |
+
# Model evaluation
|
29 |
+
# print("Predected rating:", y_pred)
|
30 |
+
# print("Actual rating:",y_test)
|
31 |
+
# print(X_test)
|
32 |
+
|
33 |
+
return model
|
34 |
+
|
35 |
+
def get_user_recommendation_XGBoost(all_moves,model, user_id, n=10):
|
36 |
+
# get all movies that the user has not seen
|
37 |
+
user_seen_movies = all_moves[all_moves['userId'] == user_id]['title']
|
38 |
+
user_unseen_movies = all_moves[~all_moves['title'].isin(user_seen_movies)]
|
39 |
+
|
40 |
+
# drop duplicates
|
41 |
+
user_unseen_movies = user_unseen_movies.drop_duplicates(subset=['title'])
|
42 |
+
|
43 |
+
# make predictions
|
44 |
+
user_unseen_movies['Pred_rating'] = model.predict(user_unseen_movies.drop(columns=['userId', 'rating', 'Train', 'title']))
|
45 |
+
|
46 |
+
# only return movies with more than 100 ratings
|
47 |
+
|
48 |
+
# get top 10 recommendations
|
49 |
+
recommendations = user_unseen_movies.sort_values(by='Pred_rating', ascending=False).head(n)['title']
|
50 |
+
return recommendations ,user_seen_movies
|
51 |
+
|
52 |
+
|
53 |
+
|
54 |
+
def get_user_recommendation(DataBase, Matrix,user_id,l=10):
|
55 |
+
user = Matrix[user_id]
|
56 |
+
user = user.sort_values(ascending=False)
|
57 |
+
# now we have a series of user similarities
|
58 |
+
# we only want to recommend movies that the user has not seen
|
59 |
+
# so we need to filter out movies that the user has seen
|
60 |
+
user_seen_movies = DataBase[DataBase['userId'] == user_id]['title']
|
61 |
+
|
62 |
+
|
63 |
+
# Now we loop through user and get top 10 recommendations
|
64 |
+
recommendations = []
|
65 |
+
print(len(user.index))
|
66 |
+
for U in user.index[1:10]:
|
67 |
+
# get all rated movies by user U
|
68 |
+
movies = DataBase[DataBase['userId'] == U]['title']
|
69 |
+
movies = movies[~movies.isin(user_seen_movies)]
|
70 |
+
|
71 |
+
# get all movies that U has rated 4 or higher
|
72 |
+
movies = movies[DataBase['rating'] >= 4]
|
73 |
+
# sort by rating
|
74 |
+
movies = movies.sort_values(ascending=False)
|
75 |
+
for movie in movies[:4]:
|
76 |
+
if movie not in recommendations:
|
77 |
+
recommendations.append(movie)
|
78 |
+
|
79 |
+
if len(recommendations) >= l:
|
80 |
+
break
|
81 |
+
|
82 |
+
|
83 |
+
|
84 |
+
return recommendations
|
85 |
+
|
86 |
+
|
87 |
+
def get_recommendation_item(dataBase,matrix, movie_name, n=10):
|
88 |
+
similar_scores = matrix[movie_name]
|
89 |
+
similar_scores = similar_scores.sort_values(ascending=False)
|
90 |
+
|
91 |
+
# only return movies with more than 100 ratings
|
92 |
+
similar_scores = similar_scores[similar_scores.index.isin(dataBase[dataBase['number_of_ratings'] > 100].index)][:n]
|
93 |
+
return similar_scores
|
94 |
+
|
95 |
+
|
96 |
+
if __name__ == '__main__':
|
97 |
+
import pickle
|
98 |
+
|
99 |
+
def load_similarity_matrix(path):
|
100 |
+
with open(path, 'rb') as f:
|
101 |
+
similarity_df = pickle.load(f)
|
102 |
+
return similarity_df
|
103 |
+
|
104 |
+
# Load the data
|
105 |
+
DataBaseCSV = r"D:\Study\ITI\Recommender Systems\Final\Movies-Recommender-System\Data\XGBoost_database.csv"
|
106 |
+
DataBase = pd.read_csv(DataBaseCSV)
|
107 |
+
# Load the similarity matrix
|
108 |
+
MatrixCSV = r"D:\Study\ITI\Recommender Systems\Final\Movies-Recommender-System\Models\user_based_matrix.pkl"
|
109 |
+
Matrix = load_similarity_matrix(MatrixCSV)
|
110 |
+
recommendations = get_user_recommendation(DataBase, Matrix,1)
|
111 |
+
print(recommendations)
|
112 |
+
|
app.py
ADDED
@@ -0,0 +1,507 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import requests
|
3 |
+
import pandas as pd
|
4 |
+
import pickle
|
5 |
+
import gdown
|
6 |
+
import os
|
7 |
+
from Helpers import get_user_recommendation , train_model , get_user_recommendation_XGBoost ,get_recommendation_item
|
8 |
+
|
9 |
+
|
10 |
+
# Set page configuration
|
11 |
+
st.set_page_config(page_title="Movie Recommendation", page_icon="🎬", layout="wide")
|
12 |
+
|
13 |
+
st.markdown(
|
14 |
+
"""
|
15 |
+
<style>
|
16 |
+
body {
|
17 |
+
background-image: url("https://repository-images.githubusercontent.com/275336521/20d38e00-6634-11eb-9d1f-6a5232d0f84f");
|
18 |
+
color: #FFFFFF;
|
19 |
+
font-family: 'Arial', sans-serif;
|
20 |
+
}
|
21 |
+
|
22 |
+
.stApp {
|
23 |
+
background: rgba(0, 0, 0, 0.7);
|
24 |
+
border-radius: 15px;
|
25 |
+
padding: 20px;
|
26 |
+
}
|
27 |
+
|
28 |
+
.title {
|
29 |
+
font-size: 3em;
|
30 |
+
text-align: center;
|
31 |
+
margin-bottom: 20px;
|
32 |
+
font-weight: bold;
|
33 |
+
color: #FF0000;
|
34 |
+
}
|
35 |
+
|
36 |
+
.section-title {
|
37 |
+
font-size: 2em;
|
38 |
+
margin-top: 30px;
|
39 |
+
margin-bottom: 20px;
|
40 |
+
text-align: center;
|
41 |
+
color: #FFD700;
|
42 |
+
}
|
43 |
+
|
44 |
+
.recommendation {
|
45 |
+
border: 1px solid #FFD700;
|
46 |
+
padding: 20px;
|
47 |
+
margin-bottom: 20px;
|
48 |
+
border-radius: 15px;
|
49 |
+
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
|
50 |
+
transition: transform 0.2s, box-shadow 0.2s;
|
51 |
+
background-color: rgba(0, 0, 0, 0.8);
|
52 |
+
overflow: hidden;
|
53 |
+
}
|
54 |
+
|
55 |
+
.recommendation:hover {
|
56 |
+
transform: translateY(-10px);
|
57 |
+
box-shadow: 0 8px 16px rgba(0, 0, 0, 0.5);
|
58 |
+
}
|
59 |
+
|
60 |
+
.recommendation img {
|
61 |
+
width: 100%;
|
62 |
+
height: 200px;
|
63 |
+
object-fit: cover;
|
64 |
+
border-radius: 10px;
|
65 |
+
margin-bottom: 10px;
|
66 |
+
}
|
67 |
+
|
68 |
+
.movie-details-container {
|
69 |
+
display: flex;
|
70 |
+
align-items: center;
|
71 |
+
margin-bottom: 20px;
|
72 |
+
}
|
73 |
+
|
74 |
+
.movie-details-container .movie-poster {
|
75 |
+
flex: 0 0 auto;
|
76 |
+
width: 30%;
|
77 |
+
margin-right: 20px;
|
78 |
+
}
|
79 |
+
|
80 |
+
.movie-details-container .movie-poster img {
|
81 |
+
width: 100%;
|
82 |
+
border-radius: 10px;
|
83 |
+
}
|
84 |
+
|
85 |
+
.movie-details-container .movie-details {
|
86 |
+
flex: 1 1 auto;
|
87 |
+
}
|
88 |
+
|
89 |
+
.movie-details-container .movie-details p {
|
90 |
+
margin: 5px 0;
|
91 |
+
}
|
92 |
+
|
93 |
+
a {
|
94 |
+
color: #FFD700;
|
95 |
+
text-decoration: none;
|
96 |
+
}
|
97 |
+
|
98 |
+
a:hover {
|
99 |
+
text-decoration: underline;
|
100 |
+
}
|
101 |
+
|
102 |
+
.stSidebar .element-container {
|
103 |
+
background: rgba(0, 0, 0, 0.7);
|
104 |
+
border-radius: 15px;
|
105 |
+
padding: 15px;
|
106 |
+
}
|
107 |
+
|
108 |
+
.stSidebar .stButton button {
|
109 |
+
background-color: #FFD700;
|
110 |
+
color: #000;
|
111 |
+
border: none;
|
112 |
+
border-radius: 10px;
|
113 |
+
padding: 10px;
|
114 |
+
transition: background-color 0.2s, transform 0.2s;
|
115 |
+
}
|
116 |
+
|
117 |
+
.stSidebar .stButton button:hover {
|
118 |
+
background-color: #FFAA00;
|
119 |
+
transform: scale(1.05);
|
120 |
+
}
|
121 |
+
</style>
|
122 |
+
""",
|
123 |
+
unsafe_allow_html=True
|
124 |
+
)
|
125 |
+
|
126 |
+
|
127 |
+
|
128 |
+
# CSV files URLs as raw data from GitHub repository
|
129 |
+
moviesCSV = "Data/movies.csv"
|
130 |
+
ratingsCSV = "Data/ratings.csv"
|
131 |
+
linksCSV = "Data/links.csv"
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
|
136 |
+
# the folloing code is used to download the similarity matrix from google drive if not exist
|
137 |
+
|
138 |
+
# the folloing code is used to download the similarity matrix from google drive if not exist
|
139 |
+
file_url = 'https://drive.google.com/uc?id=1-1bpusE96_Hh0rUxU7YmBo6RiwYLQGVy'
|
140 |
+
DataBaseCSV = "https://drive.google.com/uc?id=11Soimwc1uKS5VGy_QROifwkdIzl8MZaV"
|
141 |
+
output_path = 'Models/similarity_matrix.pkl'
|
142 |
+
output_path_DataBase = 'Data/XGBoost_database.csv'
|
143 |
+
|
144 |
+
|
145 |
+
user_matrix_path = 'Models/User_based_matrix.pkl'
|
146 |
+
|
147 |
+
@st.cache_data
|
148 |
+
def download_model_from_google_drive(file_url, output_path):
|
149 |
+
gdown.download(file_url, output_path, quiet=False)
|
150 |
+
|
151 |
+
|
152 |
+
# # Check if the file already exists
|
153 |
+
if not os.path.exists(output_path):
|
154 |
+
print("Downloading the similarity matrix from Googlr Drive...")
|
155 |
+
# change file permission
|
156 |
+
# os.chmod('Models/', 0o777)
|
157 |
+
download_model_from_google_drive(file_url, output_path)
|
158 |
+
download_model_from_google_drive(DataBaseCSV, output_path_DataBase)
|
159 |
+
|
160 |
+
print("Download completed......")
|
161 |
+
|
162 |
+
|
163 |
+
|
164 |
+
# Dummy data for user recommendations
|
165 |
+
user_recommendations = {
|
166 |
+
1: ["Inception", "The Matrix", "Interstellar"],
|
167 |
+
2: ["The Amazing Spider-Man", "District 9", "Titanic"]
|
168 |
+
}
|
169 |
+
|
170 |
+
# Function to hash passwords
|
171 |
+
def hash_password(password):
|
172 |
+
pass
|
173 |
+
|
174 |
+
# Dummy user database
|
175 |
+
user_db = {
|
176 |
+
1: "password123",
|
177 |
+
2: "mypassword"
|
178 |
+
}
|
179 |
+
|
180 |
+
# Login function
|
181 |
+
def login(username, password):
|
182 |
+
if isinstance(username, int) and username > 0 and username < 610:
|
183 |
+
return True
|
184 |
+
return False
|
185 |
+
|
186 |
+
|
187 |
+
|
188 |
+
|
189 |
+
# Function to fetch movie details from OMDb API
|
190 |
+
# def fetch_movie_details(title, api_key="23f109b2"):
|
191 |
+
# url = f"http://www.omdbapi.com/?t={title}&apikey={api_key}"
|
192 |
+
# response = requests.get(url)
|
193 |
+
# return response.json()
|
194 |
+
|
195 |
+
# Display movie details
|
196 |
+
|
197 |
+
import re
|
198 |
+
|
199 |
+
def fetch_movie_details(title, api_key_omdb="23f109b2", api_key_tmdb="b8c96e534866701532768a313b978c8b"):
|
200 |
+
# First, try the OMDb API
|
201 |
+
title = title[:-7]
|
202 |
+
title = title.replace('+', '')
|
203 |
+
url_omdb = f"http://www.omdbapi.com/?t={title}&apikey={api_key_omdb}"
|
204 |
+
response_omdb = requests.get(url_omdb)
|
205 |
+
movie = response_omdb.json()
|
206 |
+
|
207 |
+
if movie['Response'] == 'True':
|
208 |
+
return movie
|
209 |
+
else:
|
210 |
+
# If OMDb API doesn't find the movie, try the TMDb API
|
211 |
+
url_tmdb_search = f"https://api.themoviedb.org/3/search/movie?api_key={api_key_tmdb}&query={title}"
|
212 |
+
response_tmdb_search = requests.get(url_tmdb_search)
|
213 |
+
search_results = response_tmdb_search.json()
|
214 |
+
|
215 |
+
if search_results['total_results'] > 0:
|
216 |
+
movie_id = search_results['results'][0]['id']
|
217 |
+
url_tmdb_movie = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key_tmdb}"
|
218 |
+
response_tmdb_movie = requests.get(url_tmdb_movie)
|
219 |
+
tmdb_movie = response_tmdb_movie.json()
|
220 |
+
|
221 |
+
# Convert TMDb response to a similar structure as OMDb response
|
222 |
+
movie = {
|
223 |
+
'Title': tmdb_movie['title'],
|
224 |
+
'Year': tmdb_movie['release_date'].split('-')[0] if 'release_date' in tmdb_movie else 'N/A',
|
225 |
+
'Rated': 'N/A', # TMDb doesn't provide rating info in the same way
|
226 |
+
'Genre': ', '.join([genre['name'] for genre in tmdb_movie['genres']]),
|
227 |
+
'Plot': tmdb_movie['overview'],
|
228 |
+
'Poster': f"https://image.tmdb.org/t/p/w500{tmdb_movie['poster_path']}" if 'poster_path' in tmdb_movie else '',
|
229 |
+
'imdbRating': tmdb_movie['vote_average'],
|
230 |
+
'imdbID': tmdb_movie['imdb_id'],
|
231 |
+
'Response': 'True'
|
232 |
+
}
|
233 |
+
return movie
|
234 |
+
else:
|
235 |
+
return {'Response': 'False', 'Error': 'Movie not found'}
|
236 |
+
|
237 |
+
def display_movie_details(movie):
|
238 |
+
if movie['Response'] == 'False':
|
239 |
+
st.write(f"Movie not found: {movie['Error']}")
|
240 |
+
return
|
241 |
+
if movie['imdbRating'] == 'N/A':
|
242 |
+
movie['imdbRating'] = 0
|
243 |
+
imdb_rating = float(movie['imdbRating'])
|
244 |
+
url = f"https://www.imdb.com/title/{movie['imdbID']}/"
|
245 |
+
|
246 |
+
# Split the plot into lines based on . or ,
|
247 |
+
plot_lines = re.split(r'[.,]', movie['Plot'])
|
248 |
+
short_plot = '. '.join(plot_lines[:3]).strip() + '.'
|
249 |
+
|
250 |
+
st.markdown(
|
251 |
+
f"""
|
252 |
+
<div style="
|
253 |
+
background-color: #313131;
|
254 |
+
border-radius: 20px;
|
255 |
+
padding: 20px;
|
256 |
+
margin: 25px 0;
|
257 |
+
box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1);
|
258 |
+
">
|
259 |
+
<div style="display: flex;">
|
260 |
+
<div style="flex: 1;">
|
261 |
+
<BR>
|
262 |
+
<a href="{url}" target="_blank" >
|
263 |
+
<img src="{movie['Poster']}" style="width: 100%; border-radius: 10px;" />
|
264 |
+
</a>
|
265 |
+
</div>
|
266 |
+
<div style="flex: 3; padding-left: 20px;">
|
267 |
+
<h3 style="margin: 0;" anchor="{url}">{movie['Title']}</h3>
|
268 |
+
<p style="color: gray;">
|
269 |
+
<b>Year:</b> {movie['Year']} Rated: {movie['Rated']} <br>
|
270 |
+
<b>Genre:</b> {movie['Genre'].replace(',', ' |')} <br>
|
271 |
+
</p>
|
272 |
+
<div>{short_plot}</div>
|
273 |
+
<div style="margin-top: 10px;">
|
274 |
+
<div style="background-color: #e0e0e0; border-radius: 5px; overflow: hidden;">
|
275 |
+
<div style="width: {imdb_rating * 10}%; background-color: #4caf50; padding: 5px 0; text-align: center; color: white;">
|
276 |
+
{imdb_rating}
|
277 |
+
</div>
|
278 |
+
</div>
|
279 |
+
</div>
|
280 |
+
</div>
|
281 |
+
</div>
|
282 |
+
</div>
|
283 |
+
""", unsafe_allow_html=True
|
284 |
+
)
|
285 |
+
|
286 |
+
|
287 |
+
|
288 |
+
|
289 |
+
|
290 |
+
|
291 |
+
def print_movie_details(movie):
|
292 |
+
st.markdown(
|
293 |
+
f"""
|
294 |
+
<div class="recommendation">
|
295 |
+
<div style="display: flex;">
|
296 |
+
<div style="flex: 1;">
|
297 |
+
<a href="https://www.imdb.com/title/tt{movie['imdb_id']:07d}/" target="_blank">
|
298 |
+
<img src="{movie['poster_url']}" />
|
299 |
+
</a>
|
300 |
+
</div>
|
301 |
+
<div style="flex: 3; padding-left: 20px;">
|
302 |
+
<h4 style="margin: 0;">{' '.join(movie['title'].split(" ")[:-1])}</h4>
|
303 |
+
<p style="color: gray;">
|
304 |
+
<b>Year:</b> {movie['title'].split(" ")[-1]}<br>
|
305 |
+
<b>Genre:</b> {', '.join(movie['genres'])}<br>
|
306 |
+
<b>Number of Ratings:</b> {movie['num_ratings']}<br>
|
307 |
+
<b>IMDb Rating: </b>{round(movie["imdb_rating"],1)}<br>
|
308 |
+
</p>
|
309 |
+
<div style="margin-top: 10px;">
|
310 |
+
<div style="background-color: #e0e0e0; border-radius: 5px; overflow: hidden;">
|
311 |
+
<div style="width: {movie['avg_rating'] * 20}%; background-color: #4caf50; padding: 5px 0; text-align: center; color: white;">
|
312 |
+
{movie['avg_rating']}
|
313 |
+
</div>
|
314 |
+
</div>
|
315 |
+
</div>
|
316 |
+
</div>
|
317 |
+
</div>
|
318 |
+
</div>
|
319 |
+
""",
|
320 |
+
unsafe_allow_html=True
|
321 |
+
)
|
322 |
+
|
323 |
+
|
324 |
+
|
325 |
+
# Function to load data
|
326 |
+
@st.cache_data
|
327 |
+
def load_data():
|
328 |
+
movies_df = pd.read_csv(moviesCSV)
|
329 |
+
ratings_df = pd.read_csv(ratingsCSV)
|
330 |
+
links_df = pd.read_csv(linksCSV)
|
331 |
+
DataBase = pd.read_csv(output_path_DataBase)
|
332 |
+
return movies_df, ratings_df, links_df , DataBase
|
333 |
+
|
334 |
+
# Function to load similarity matrix
|
335 |
+
@st.cache_data
|
336 |
+
def load_similarity_matrix(path):
|
337 |
+
with open(path, 'rb') as f:
|
338 |
+
similarity_df = pickle.load(f)
|
339 |
+
return similarity_df
|
340 |
+
|
341 |
+
# Function to get movie details
|
342 |
+
def get_movie_details(movie_id, df_movies, df_ratings, df_links):
|
343 |
+
try:
|
344 |
+
imdb_id = df_links[df_links['movieId'] == movie_id]['imdbId'].values[0]
|
345 |
+
tmdb_id = df_links[df_links['movieId'] == movie_id]['tmdbId'].values[0]
|
346 |
+
|
347 |
+
movie_data = df_movies[df_movies['movieId'] == movie_id].iloc[0]
|
348 |
+
genres = movie_data['genres'].split('|') if 'genres' in movie_data else []
|
349 |
+
|
350 |
+
avg_rating = df_ratings[df_ratings['movieId'] == movie_id]['rating'].mean()
|
351 |
+
num_ratings = df_ratings[df_ratings['movieId'] == movie_id].shape[0]
|
352 |
+
|
353 |
+
api_key = 'b8c96e534866701532768a313b978c8b'
|
354 |
+
response = requests.get(f'https://api.themoviedb.org/3/movie/{tmdb_id}?api_key={api_key}' )
|
355 |
+
poster_url = response.json().get('poster_path', '')
|
356 |
+
full_poster_url = f'https://image.tmdb.org/t/p/w500{poster_url}' if poster_url else ''
|
357 |
+
imdb_rating = response.json().get('vote_average', 0)
|
358 |
+
|
359 |
+
return {
|
360 |
+
"title": movie_data['title'],
|
361 |
+
"genres": genres,
|
362 |
+
"avg_rating": round(avg_rating, 2),
|
363 |
+
"num_ratings": num_ratings,
|
364 |
+
"imdb_id": imdb_id,
|
365 |
+
"tmdb_id": tmdb_id,
|
366 |
+
"poster_url": full_poster_url,
|
367 |
+
"imdb_rating": imdb_rating
|
368 |
+
}
|
369 |
+
except Exception as e:
|
370 |
+
st.error(f"Error fetching details for movie ID {movie_id}: {e}")
|
371 |
+
return None
|
372 |
+
|
373 |
+
# Function to recommend movies
|
374 |
+
def recommend(movie, similarity_df, movies_df, ratings_df, links_df, k=5):
|
375 |
+
try:
|
376 |
+
index = movies_df[movies_df['title'] == movie].index[0]
|
377 |
+
|
378 |
+
distances = sorted(list(enumerate(similarity_df.iloc[index])), reverse=True, key=lambda x: x[1])
|
379 |
+
|
380 |
+
recommended_movies = []
|
381 |
+
for i in distances[1:]:
|
382 |
+
movie_id = movies_df.iloc[i[0]]['movieId']
|
383 |
+
num_ratings = ratings_df[ratings_df['movieId'] == movie_id].shape[0]
|
384 |
+
|
385 |
+
if num_ratings > 100:
|
386 |
+
movie_details = get_movie_details(movie_id, movies_df, ratings_df, links_df)
|
387 |
+
if movie_details:
|
388 |
+
recommended_movies.append(movie_details)
|
389 |
+
if len(recommended_movies) == k:
|
390 |
+
break
|
391 |
+
return recommended_movies
|
392 |
+
except Exception as e:
|
393 |
+
st.error(f"Error generating recommendations: {e}")
|
394 |
+
return []
|
395 |
+
|
396 |
+
# Main app
|
397 |
+
|
398 |
+
def main():
|
399 |
+
|
400 |
+
movies_df, ratings_df, links_df , DB_df = load_data()
|
401 |
+
print("Data loaded successfully")
|
402 |
+
print("Loading similarity matrix...")
|
403 |
+
similarity_df = load_similarity_matrix(output_path)
|
404 |
+
|
405 |
+
st.sidebar.title("Navigation")
|
406 |
+
menu = ["Login", "Movie Similarity"]
|
407 |
+
choice = st.sidebar.selectbox("Select an option", menu)
|
408 |
+
|
409 |
+
if choice == "Login":
|
410 |
+
st.title("Movie Recommendations")
|
411 |
+
st.write("Welcome to the Movie Recommendation App!")
|
412 |
+
st.write("Please login to get personalized movie recommendations. username between (1 and 800)")
|
413 |
+
# model selection
|
414 |
+
C = st.selectbox("Select the model", ["User Similarity Matrix", "XGBoost"])
|
415 |
+
|
416 |
+
# Login form
|
417 |
+
st.sidebar.header("Login")
|
418 |
+
username = st.sidebar.text_input("Username")
|
419 |
+
if username:
|
420 |
+
username = int(username)
|
421 |
+
# password = st.sidebar.text_input("Password", type="password")
|
422 |
+
if st.sidebar.button("Login"):
|
423 |
+
if login(username, 'password'):
|
424 |
+
st.sidebar.success("Login successful!")
|
425 |
+
if C == "User Similarity Matrix":
|
426 |
+
user_matrix = load_similarity_matrix(user_matrix_path)
|
427 |
+
recommendations = get_user_recommendation(DB_df, user_matrix, username)
|
428 |
+
elif C == "XGBoost":
|
429 |
+
model = train_model(DB_df,username)
|
430 |
+
recommendations , user_seen_movies = get_user_recommendation_XGBoost(DB_df, model, username)
|
431 |
+
else:
|
432 |
+
recommendations = user_recommendations.get(username, [])
|
433 |
+
st.write(f"Recommendations for user number {username}:")
|
434 |
+
num_cols = 2
|
435 |
+
cols = st.columns(num_cols)
|
436 |
+
for i, movie_title in enumerate(recommendations):
|
437 |
+
movie = fetch_movie_details(movie_title)
|
438 |
+
if movie['Response'] == 'True':
|
439 |
+
with cols[i % num_cols]:
|
440 |
+
display_movie_details(movie)
|
441 |
+
else:
|
442 |
+
st.write(f"Movie details for '{movie_title}' not found.")
|
443 |
+
else:
|
444 |
+
st.sidebar.error("Invalid email or password")
|
445 |
+
|
446 |
+
elif choice == "Movie Similarity":
|
447 |
+
num_cols = 2
|
448 |
+
cols = st.columns(num_cols)
|
449 |
+
|
450 |
+
# Movie similarity search
|
451 |
+
with cols[0]:
|
452 |
+
st.title("Find Similar Movies")
|
453 |
+
selected_movie = st.selectbox("Type or select a movie from the dropdown", movies_df['title'].unique())
|
454 |
+
k = st.slider("Select the number of recommendations (k)", min_value=1, max_value=50, value=5)
|
455 |
+
button = st.button("Find Similar Movies")
|
456 |
+
with cols[1]:
|
457 |
+
st.title("Choosen Movie Details:")
|
458 |
+
if selected_movie:
|
459 |
+
# correct_Name = selected_movie[:-7]
|
460 |
+
movie = fetch_movie_details(selected_movie)
|
461 |
+
if movie['Response'] == 'True':
|
462 |
+
display_movie_details(movie)
|
463 |
+
else:
|
464 |
+
st.write(f"Movie details for '{selected_movie}' not found.")
|
465 |
+
if button:
|
466 |
+
st.write("The rating bar here is token from our dataset and it's between 0 and 5.")
|
467 |
+
if selected_movie:
|
468 |
+
# recommendations = get_recommendation_item(DB_df, similarity_df, selected_movie , k)
|
469 |
+
recommendations = recommend(selected_movie, similarity_df, movies_df, ratings_df, links_df, k)
|
470 |
+
if recommendations:
|
471 |
+
st.write(f"Similar movies to '{selected_movie}':")
|
472 |
+
num_cols = 2
|
473 |
+
cols = st.columns(num_cols)
|
474 |
+
|
475 |
+
# movie_id = movies_df[movies_df['title'] == selected_movie]['movieId'].values[0]
|
476 |
+
# movie_details = get_movie_details(movie_id, movies_df, ratings_df, links_df)
|
477 |
+
# if movie_details:
|
478 |
+
# st.markdown(f'<h2 class="section-title">{movie_details["title"]} Details:</h2>', unsafe_allow_html=True)
|
479 |
+
# st.markdown(
|
480 |
+
# f"""
|
481 |
+
# <div class="movie-details-container">
|
482 |
+
# <div class="movie-poster">
|
483 |
+
# <img src="{movie_details['poster_url']}" alt="Movie Poster">
|
484 |
+
# </div>
|
485 |
+
# <div class="movie-details">
|
486 |
+
# <p><b>Genres:</b> {', '.join(movie_details['genres'])}</p>
|
487 |
+
# <p><b>Average Rating:</b> {movie_details['avg_rating']}</p>
|
488 |
+
# <p><b>Number of Ratings:</b> {movie_details['num_ratings']}</p>
|
489 |
+
# <p><b>IMDb :</b> <a href="https://www.imdb.com/title/tt{movie_details['imdb_id']:07d}/" target="_blank">movie link</a></p>
|
490 |
+
# </div>
|
491 |
+
# </div>
|
492 |
+
# """,
|
493 |
+
# unsafe_allow_html=True
|
494 |
+
# )
|
495 |
+
|
496 |
+
|
497 |
+
|
498 |
+
for i, movie in enumerate(recommendations):
|
499 |
+
with cols[i % num_cols]:
|
500 |
+
print_movie_details(movie)
|
501 |
+
else:
|
502 |
+
st.write("No recommendations found.")
|
503 |
+
else:
|
504 |
+
st.write("Please select a movie.")
|
505 |
+
|
506 |
+
if __name__ == "__main__":
|
507 |
+
main()
|