import streamlit as st
import requests
import pandas as pd
import pickle
import gdown
import re
import os
from Helpers import get_user_recommendation , train_model , get_user_recommendation_XGBoost , seen_movies
# Set page configuration
st.set_page_config(page_title="Movie Recommendation", page_icon="🎬", layout="wide")
body {
background-image: url("");
color: #FFFFFF;
font-family: 'Arial', sans-serif;
.stApp {
background: rgba(0, 0, 0, 0.7);
border-radius: 15px;
padding: 20px;
.title {
font-size: 3em;
text-align: center;
margin-bottom: 20px;
font-weight: bold;
color: #FF0000;
.section-title {
font-size: 2em;
margin-top: 30px;
margin-bottom: 20px;
text-align: center;
color: #FFD700;
.recommendation {
border: 1px solid #FFD700;
padding: 20px;
margin-bottom: 20px;
border-radius: 15px;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
transition: transform 0.2s, box-shadow 0.2s;
background-color: rgba(0, 0, 0, 0.8);
overflow: hidden;
.recommendation:hover {
transform: translateY(-10px);
box-shadow: 0 8px 16px rgba(0, 0, 0, 0.5);
.recommendation img {
width: 100%;
height: 200px;
object-fit: cover;
border-radius: 10px;
margin-bottom: 10px;
.movie-details-container {
display: flex;
align-items: center;
margin-bottom: 20px;
.movie-details-container .movie-poster {
flex: 0 0 auto;
width: 30%;
margin-right: 20px;
.movie-details-container .movie-poster img {
width: 100%;
border-radius: 10px;
.movie-details-container .movie-details {
flex: 1 1 auto;
.movie-details-container .movie-details p {
margin: 5px 0;
a {
color: #FFD700;
text-decoration: none;
a:hover {
text-decoration: underline;
.stSidebar .element-container {
background: rgba(0, 0, 0, 0.7);
border-radius: 15px;
padding: 15px;
.stSidebar .stButton button {
background-color: #FFD700;
color: #000;
border: none;
border-radius: 10px;
padding: 10px;
transition: background-color 0.2s, transform 0.2s;
.stSidebar .stButton button:hover {
background-color: #FFAA00;
transform: scale(1.05);
# CSV files URLs as raw data from GitHub repository
moviesCSV = "Data/movies.csv"
ratingsCSV = "Data/ratings.csv"
linksCSV = "Data/links.csv"
# the folloing code is used to download the similarity matrix from google drive if not exist
file_url = ''
DataBaseCSV = ""
output_path = 'Models/similarity_matrix.pkl'
output_path_DataBase = 'Data/XGBoost_database.csv'
user_matrix_path = 'Models/User_based_matrix.pkl'
def download_model_from_google_drive(file_url, output_path):, output_path, quiet=False)
# # Check if the file already exists
if not os.path.exists(output_path):
print("Downloading the similarity matrix from Googlr Drive...")
# change file permission
# os.chmod('Models/', 0o777)
download_model_from_google_drive(file_url, output_path)
download_model_from_google_drive(DataBaseCSV, output_path_DataBase)
print("Download completed......")
def display_user_history(history):
st.write("Your Watch History:")
container = st.container(height=300)
for movie in history:
with container:
def get_user_history(dataBase, user_id):
return dataBase[dataBase['userId'] == user_id]['title'].values
# Function to hash passwords
def hash_password(password):
# Login function
def login(username, password = None):
if isinstance(username, int) and username > 0 and username < 610:
return True
return False
def fetch_movie_details(title, api_key_omdb="23f109b2", api_key_tmdb="b8c96e534866701532768a313b978c8b"):
# First, try the OMDb API
title = title[:-7]
title = title.replace('+', '')
url_omdb = f"{title}&apikey={api_key_omdb}"
response_omdb = requests.get(url_omdb)
movie = response_omdb.json()
if movie['Response'] == 'True':
return movie
# If OMDb API doesn't find the movie, try the TMDb API
url_tmdb_search = f"{api_key_tmdb}&query={title}"
response_tmdb_search = requests.get(url_tmdb_search)
search_results = response_tmdb_search.json()
if search_results['total_results'] > 0:
movie_id = search_results['results'][0]['id']
url_tmdb_movie = f"{movie_id}?api_key={api_key_tmdb}"
response_tmdb_movie = requests.get(url_tmdb_movie)
tmdb_movie = response_tmdb_movie.json()
# Convert TMDb response to a similar structure as OMDb response
movie = {
'Title': tmdb_movie['title'],
'Year': tmdb_movie['release_date'].split('-')[0] if 'release_date' in tmdb_movie else 'N/A',
'Rated': 'N/A', # TMDb doesn't provide rating info in the same way
'Genre': ', '.join([genre['name'] for genre in tmdb_movie['genres']]),
'Plot': tmdb_movie['overview'],
'Poster': f"{tmdb_movie['poster_path']}" if 'poster_path' in tmdb_movie else '',
'imdbRating': tmdb_movie['vote_average'],
'imdbID': tmdb_movie['imdb_id'],
'Response': 'True'
return movie
return {'Response': 'False', 'Error': 'Movie not found'}
def display_movie_details(movie):
if movie['Response'] == 'False':
st.write(f"Movie not found: {movie['Error']}")
if movie['imdbRating'] == 'N/A':
movie['imdbRating'] = 0
imdb_rating = float(movie['imdbRating'])
url = f"{movie['imdbID']}/"
# Split the plot into lines based on . or ,
plot_lines = re.split(r'[.,]', movie['Plot'])
short_plot = '. '.join(plot_lines[:3]).strip() + '.'
<div style="
background-color: #313131;
border-radius: 20px;
padding: 20px;
margin: 25px 0;
box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1);
<div style="display: flex;">
<div style="flex: 1;">
<a href="{url}" target="_blank" >
<img src="{movie['Poster']}" style="width: 100%; border-radius: 10px;" />
<div style="flex: 3; padding-left: 20px;">
<h3 style="margin: 0;" anchor="{url}">{movie['Title']}</h3>
<p style="color: gray;">
<b>Year:</b> {movie['Year']} Rated: {movie['Rated']} <br>
<b>Genre:</b> {movie['Genre'].replace(',', ' |')} <br>
<div style="margin-top: 10px;">
<div style="background-color: #e0e0e0; border-radius: 5px; overflow: hidden;">
<div style="width: {imdb_rating * 10}%; background-color: #4caf50; padding: 5px 0; text-align: center; color: white;">
""", unsafe_allow_html=True
def print_movie_details(movie):
<div class="recommendation">
<div style="display: flex;">
<div style="flex: 1;">
<a href="{movie['imdb_id']:07d}/" target="_blank">
<img src="{movie['poster_url']}" />
<div style="flex: 3; padding-left: 20px;">
<h4 style="margin: 0;">{' '.join(movie['title'].split(" ")[:-1])}</h4>
<p style="color: gray;">
<b>Year:</b> {movie['title'].split(" ")[-1]}<br>
<b>Genre:</b> {', '.join(movie['genres'])}<br>
<b>Number of Ratings:</b> {movie['num_ratings']}<br>
<b>IMDb Rating: </b>{round(movie["imdb_rating"],1)}<br>
<div style="margin-top: 10px;">
<div style="background-color: #e0e0e0; border-radius: 5px; overflow: hidden;">
<div style="width: {movie['avg_rating'] * 20}%; background-color: #4caf50; padding: 5px 0; text-align: center; color: white;">
# Function to load data
def load_data():
movies_df = pd.read_csv(moviesCSV)
ratings_df = pd.read_csv(ratingsCSV)
links_df = pd.read_csv(linksCSV)
DataBase = pd.read_csv(output_path_DataBase)
return movies_df, ratings_df, links_df , DataBase
# Function to load similarity matrix
def load_similarity_matrix(path):
with open(path, 'rb') as f:
similarity_df = pickle.load(f)
return similarity_df
# Function to get movie details
def get_movie_details(movie_id, df_movies, df_ratings, df_links):
imdb_id = df_links[df_links['movieId'] == movie_id]['imdbId'].values[0]
tmdb_id = df_links[df_links['movieId'] == movie_id]['tmdbId'].values[0]
movie_data = df_movies[df_movies['movieId'] == movie_id].iloc[0]
genres = movie_data['genres'].split('|') if 'genres' in movie_data else []
avg_rating = df_ratings[df_ratings['movieId'] == movie_id]['rating'].mean()
num_ratings = df_ratings[df_ratings['movieId'] == movie_id].shape[0]
api_key = 'b8c96e534866701532768a313b978c8b'
response = requests.get(f'{tmdb_id}?api_key={api_key}' )
poster_url = response.json().get('poster_path', '')
full_poster_url = f'{poster_url}' if poster_url else ''
imdb_rating = response.json().get('vote_average', 0)
return {
"title": movie_data['title'],
"genres": genres,
"avg_rating": round(avg_rating, 2),
"num_ratings": num_ratings,
"imdb_id": imdb_id,
"tmdb_id": tmdb_id,
"poster_url": full_poster_url,
"imdb_rating": imdb_rating
except Exception as e:
st.error(f"Error fetching details for movie ID {movie_id}: {e}")
return None
# Function to recommend movies
def recommend(movie, similarity_df, movies_df, ratings_df, links_df, k=5):
index = movies_df[movies_df['title'] == movie].index[0]
distances = sorted(list(enumerate(similarity_df.iloc[index])), reverse=True, key=lambda x: x[1])
recommended_movies = []
for i in distances[1:]:
movie_id = movies_df.iloc[i[0]]['movieId']
num_ratings = ratings_df[ratings_df['movieId'] == movie_id].shape[0]
if num_ratings > 100:
movie_details = get_movie_details(movie_id, movies_df, ratings_df, links_df)
if movie_details:
if len(recommended_movies) == k:
return recommended_movies
except Exception as e:
st.error(f"Error generating recommendations: {e}")
return []
# Main app
def main():
movies_df, ratings_df, links_df, DB_df = load_data()
print("Data loaded successfully")
print("Loading similarity matrix...")
similarity_df = load_similarity_matrix(output_path)
menu = ["Login", "Movie Similarity"]
choice = st.sidebar.selectbox("Select an option", menu)
if choice == "Login":
num_cols = 2
cols = st.columns(num_cols)
with cols[0]:
st.title("Movie Recommendations")
st.write("Welcome to the Movie Recommendation App!")
st.write("Please login to get personalized movie recommendations. username between (1 and 609)")
# model selection
C = st.selectbox("Select the model", ["User Similarity Matrix", "XGBoost"])
# Login form
username = st.sidebar.text_input("Username")
if username:
username = int(username)
if st.sidebar.button("Login"):
if login(username):
st.sidebar.success("Login successful!")
# Fetch user history
with cols[1 % num_cols]:
user_history = get_user_history(DB_df, username)
if C == "User Similarity Matrix":
user_matrix = load_similarity_matrix(user_matrix_path)
recommendations = get_user_recommendation(DB_df, user_matrix, username)
elif C == "XGBoost":
model = train_model(DB_df, username)
recommendations, user_seen_movies = get_user_recommendation_XGBoost(DB_df, model, username)
st.write(f"Recommendations for user number {username}:")
num_cols = 2
cols = st.columns(num_cols)
for i, movie_title in enumerate(recommendations):
movie = fetch_movie_details(movie_title)
if movie['Response'] == 'True':
with cols[i % num_cols]:
st.write(f"Movie details for '{movie_title}' not found.")
st.sidebar.error("Invalid email or password")
elif choice == "Movie Similarity":
num_cols = 2
cols = st.columns(num_cols)
# Movie similarity search
with cols[0]:
st.title("Find Similar Movies")
selected_movie = st.selectbox("Type or select a movie from the dropdown", movies_df['title'].unique())
k = st.slider("Select the number of recommendations (k)", min_value=1, max_value=50, value=5)
button = st.button("Find Similar Movies")
with cols[1]:
st.title("Choosen Movie Details:")
if selected_movie:
movie = fetch_movie_details(selected_movie)
if movie['Response'] == 'True':
st.write(f"Movie details for '{selected_movie}' not found.")
if button:
st.write("The rating bar here is token from our dataset and it's between 0 and 5.")
if selected_movie:
recommendations = recommend(selected_movie, similarity_df, movies_df, ratings_df, links_df, k)
if recommendations:
st.write(f"Similar movies to '{selected_movie}':")
num_cols = 2
cols = st.columns(num_cols)
for i, movie in enumerate(recommendations):
with cols[i % num_cols]:
st.write("No recommendations found.")
st.write("Please select a movie.")
if __name__ == "__main__":