alyalsayed
commited on
Commit
·
c31deea
1
Parent(s):
0c89da5
feat : upload project
Browse files- app.py +58 -0
- requirements.txt +8 -0
- src/.gitignore +2 -0
- utils.py +77 -0
app.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pickle
|
3 |
+
from utils import fetch_poster, recommend, improved_recommendations
|
4 |
+
|
5 |
+
st.header('Movie Recommender System Using Machine Learning')
|
6 |
+
# Load movies_df
|
7 |
+
with open('src/movies_df.pkl', 'rb') as f:
|
8 |
+
movies = pickle.load(f)
|
9 |
+
|
10 |
+
# Load cosine_sim
|
11 |
+
with open('src/cosine_sim.pkl', 'rb') as f:
|
12 |
+
similarity = pickle.load(f)
|
13 |
+
|
14 |
+
movie_list = movies['title'].values
|
15 |
+
selected_movie = st.selectbox(
|
16 |
+
"Type or select a movie from the dropdown",
|
17 |
+
movie_list
|
18 |
+
)
|
19 |
+
|
20 |
+
if st.button('Show Recommendation'):
|
21 |
+
recommended_movie_names, recommended_movie_posters = recommend(selected_movie,movies,similarity)
|
22 |
+
num_cols = 5
|
23 |
+
num_recommendations = 10
|
24 |
+
|
25 |
+
col_list = st.columns(num_cols)
|
26 |
+
|
27 |
+
for i in range(num_recommendations):
|
28 |
+
col_index = i % num_cols
|
29 |
+
with col_list[col_index]:
|
30 |
+
st.text(recommended_movie_names[i])
|
31 |
+
st.image(recommended_movie_posters[i])
|
32 |
+
|
33 |
+
if st.button('Show Improved Recommendations'):
|
34 |
+
recommendations = improved_recommendations(selected_movie,movies,similarity)
|
35 |
+
|
36 |
+
if recommendations is not None:
|
37 |
+
recommended_movie_ids = recommendations['id'].tolist()
|
38 |
+
recommended_movie_names = recommendations['title'].tolist()
|
39 |
+
recommended_movie_posters = []
|
40 |
+
|
41 |
+
for movie_id in recommended_movie_ids:
|
42 |
+
recommended_movie_posters.append(fetch_poster(movie_id))
|
43 |
+
|
44 |
+
num_recommendations = len(recommended_movie_ids)
|
45 |
+
num_cols = 5
|
46 |
+
|
47 |
+
# Calculate the number of rows needed
|
48 |
+
num_rows = (num_recommendations + num_cols - 1) // num_cols
|
49 |
+
|
50 |
+
for row in range(num_rows):
|
51 |
+
col_list = st.columns(num_cols)
|
52 |
+
start_index = row * num_cols
|
53 |
+
end_index = min((row + 1) * num_cols, num_recommendations)
|
54 |
+
|
55 |
+
for i in range(start_index, end_index):
|
56 |
+
with col_list[i - start_index]:
|
57 |
+
st.text(recommended_movie_names[i])
|
58 |
+
st.image(recommended_movie_posters[i])
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
requests
|
3 |
+
python-dotenv
|
4 |
+
pandas
|
5 |
+
numpy
|
6 |
+
matplotlib
|
7 |
+
difflib
|
8 |
+
pickle
|
src/.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
cosine_sim.pkl
|
2 |
+
movies_df.pkl
|
utils.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pickle
|
3 |
+
import requests
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
|
6 |
+
# Load environment variables from .env file
|
7 |
+
load_dotenv()
|
8 |
+
|
9 |
+
# Access the TMDB API key from the environment variables
|
10 |
+
TMDB_API_KEY = os.getenv("TMDB_API_KEY")
|
11 |
+
|
12 |
+
def fetch_poster(movie_id):
|
13 |
+
url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={TMDB_API_KEY}&language=en-US"
|
14 |
+
try:
|
15 |
+
response = requests.get(url)
|
16 |
+
response.raise_for_status() # Raises an HTTPError if the response was unsuccessful
|
17 |
+
data = response.json()
|
18 |
+
poster_path = data.get('poster_path') # Use .get() to avoid KeyError if 'poster_path' is missing
|
19 |
+
if poster_path:
|
20 |
+
full_path = f"https://image.tmdb.org/t/p/w500/{poster_path}"
|
21 |
+
return full_path
|
22 |
+
else:
|
23 |
+
st.warning(f"No poster found for movie ID: {movie_id}")
|
24 |
+
return None
|
25 |
+
except requests.exceptions.HTTPError as errh:
|
26 |
+
st.error(f"HTTP Error: {errh}")
|
27 |
+
except requests.exceptions.ConnectionError as errc:
|
28 |
+
st.error(f"Error Connecting: {errc}")
|
29 |
+
except requests.exceptions.Timeout as errt:
|
30 |
+
st.error(f"Timeout Error: {errt}")
|
31 |
+
except requests.exceptions.RequestException as err:
|
32 |
+
st.error(f"Something went wrong: {err}")
|
33 |
+
return None
|
34 |
+
|
35 |
+
def recommend(movie,movies,similarity):
|
36 |
+
index = movies[movies['title'] == movie].index[0]
|
37 |
+
distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])
|
38 |
+
recommended_movie_names = []
|
39 |
+
recommended_movie_posters = []
|
40 |
+
for i in distances[1:11]: # Modify the loop to iterate over 10 distances
|
41 |
+
# fetch the movie poster
|
42 |
+
movie_id = movies.iloc[i[0]].id
|
43 |
+
recommended_movie_posters.append(fetch_poster(movie_id))
|
44 |
+
recommended_movie_names.append(movies.iloc[i[0]].title)
|
45 |
+
|
46 |
+
return recommended_movie_names, recommended_movie_posters
|
47 |
+
|
48 |
+
def improved_recommendations(title,movies,similarity):
|
49 |
+
index = movies[movies['title'] == title].index
|
50 |
+
if len(index) == 0:
|
51 |
+
st.warning(f"No movie found with the title '{title}'.")
|
52 |
+
return None
|
53 |
+
|
54 |
+
idx = index[0]
|
55 |
+
sim_scores = list(enumerate(similarity[idx]))
|
56 |
+
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
|
57 |
+
sim_scores = sim_scores[1:26]
|
58 |
+
movie_indices = [i[0] for i in sim_scores]
|
59 |
+
|
60 |
+
movies_qualified = movies.iloc[movie_indices][['id','title', 'vote_count', 'vote_average', 'year']]
|
61 |
+
vote_counts = movies_qualified[movies_qualified['vote_count'].notnull()]['vote_count'].astype('int')
|
62 |
+
vote_averages = movies_qualified[movies_qualified['vote_average'].notnull()]['vote_average'].astype('int')
|
63 |
+
C = vote_averages.mean()
|
64 |
+
m = vote_counts.quantile(0.60)
|
65 |
+
|
66 |
+
def weighted_rating(x, C=C, m=m):
|
67 |
+
v = x['vote_count']
|
68 |
+
R = x['vote_average']
|
69 |
+
return (v / (v + m) * R) + (m / (m + v) * C)
|
70 |
+
|
71 |
+
qualified_movies = movies_qualified[(movies_qualified['vote_count'] >= m) & (movies_qualified['vote_count'].notnull()) & (movies_qualified['vote_average'].notnull())]
|
72 |
+
qualified_movies['vote_count'] = qualified_movies['vote_count'].astype('int')
|
73 |
+
qualified_movies['vote_average'] = qualified_movies['vote_average'].astype('int')
|
74 |
+
qualified_movies['wr'] = qualified_movies.apply(lambda x: weighted_rating(x), axis=1)
|
75 |
+
qualified_movies = qualified_movies.sort_values('wr', ascending=False).head(10)
|
76 |
+
|
77 |
+
return qualified_movies
|