alyalsayed commited on
Commit
c31deea
·
1 Parent(s): 0c89da5

feat : upload project

Browse files
Files changed (4) hide show
  1. app.py +58 -0
  2. requirements.txt +8 -0
  3. src/.gitignore +2 -0
  4. utils.py +77 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ from utils import fetch_poster, recommend, improved_recommendations
4
+
5
+ st.header('Movie Recommender System Using Machine Learning')
6
+ # Load movies_df
7
+ with open('src/movies_df.pkl', 'rb') as f:
8
+ movies = pickle.load(f)
9
+
10
+ # Load cosine_sim
11
+ with open('src/cosine_sim.pkl', 'rb') as f:
12
+ similarity = pickle.load(f)
13
+
14
+ movie_list = movies['title'].values
15
+ selected_movie = st.selectbox(
16
+ "Type or select a movie from the dropdown",
17
+ movie_list
18
+ )
19
+
20
+ if st.button('Show Recommendation'):
21
+ recommended_movie_names, recommended_movie_posters = recommend(selected_movie,movies,similarity)
22
+ num_cols = 5
23
+ num_recommendations = 10
24
+
25
+ col_list = st.columns(num_cols)
26
+
27
+ for i in range(num_recommendations):
28
+ col_index = i % num_cols
29
+ with col_list[col_index]:
30
+ st.text(recommended_movie_names[i])
31
+ st.image(recommended_movie_posters[i])
32
+
33
+ if st.button('Show Improved Recommendations'):
34
+ recommendations = improved_recommendations(selected_movie,movies,similarity)
35
+
36
+ if recommendations is not None:
37
+ recommended_movie_ids = recommendations['id'].tolist()
38
+ recommended_movie_names = recommendations['title'].tolist()
39
+ recommended_movie_posters = []
40
+
41
+ for movie_id in recommended_movie_ids:
42
+ recommended_movie_posters.append(fetch_poster(movie_id))
43
+
44
+ num_recommendations = len(recommended_movie_ids)
45
+ num_cols = 5
46
+
47
+ # Calculate the number of rows needed
48
+ num_rows = (num_recommendations + num_cols - 1) // num_cols
49
+
50
+ for row in range(num_rows):
51
+ col_list = st.columns(num_cols)
52
+ start_index = row * num_cols
53
+ end_index = min((row + 1) * num_cols, num_recommendations)
54
+
55
+ for i in range(start_index, end_index):
56
+ with col_list[i - start_index]:
57
+ st.text(recommended_movie_names[i])
58
+ st.image(recommended_movie_posters[i])
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ requests
3
+ python-dotenv
4
+ pandas
5
+ numpy
6
+ matplotlib
7
+ difflib
8
+ pickle
src/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ cosine_sim.pkl
2
+ movies_df.pkl
utils.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ import requests
4
+ from dotenv import load_dotenv
5
+
6
+ # Load environment variables from .env file
7
+ load_dotenv()
8
+
9
+ # Access the TMDB API key from the environment variables
10
+ TMDB_API_KEY = os.getenv("TMDB_API_KEY")
11
+
12
+ def fetch_poster(movie_id):
13
+ url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={TMDB_API_KEY}&language=en-US"
14
+ try:
15
+ response = requests.get(url)
16
+ response.raise_for_status() # Raises an HTTPError if the response was unsuccessful
17
+ data = response.json()
18
+ poster_path = data.get('poster_path') # Use .get() to avoid KeyError if 'poster_path' is missing
19
+ if poster_path:
20
+ full_path = f"https://image.tmdb.org/t/p/w500/{poster_path}"
21
+ return full_path
22
+ else:
23
+ st.warning(f"No poster found for movie ID: {movie_id}")
24
+ return None
25
+ except requests.exceptions.HTTPError as errh:
26
+ st.error(f"HTTP Error: {errh}")
27
+ except requests.exceptions.ConnectionError as errc:
28
+ st.error(f"Error Connecting: {errc}")
29
+ except requests.exceptions.Timeout as errt:
30
+ st.error(f"Timeout Error: {errt}")
31
+ except requests.exceptions.RequestException as err:
32
+ st.error(f"Something went wrong: {err}")
33
+ return None
34
+
35
+ def recommend(movie,movies,similarity):
36
+ index = movies[movies['title'] == movie].index[0]
37
+ distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])
38
+ recommended_movie_names = []
39
+ recommended_movie_posters = []
40
+ for i in distances[1:11]: # Modify the loop to iterate over 10 distances
41
+ # fetch the movie poster
42
+ movie_id = movies.iloc[i[0]].id
43
+ recommended_movie_posters.append(fetch_poster(movie_id))
44
+ recommended_movie_names.append(movies.iloc[i[0]].title)
45
+
46
+ return recommended_movie_names, recommended_movie_posters
47
+
48
+ def improved_recommendations(title,movies,similarity):
49
+ index = movies[movies['title'] == title].index
50
+ if len(index) == 0:
51
+ st.warning(f"No movie found with the title '{title}'.")
52
+ return None
53
+
54
+ idx = index[0]
55
+ sim_scores = list(enumerate(similarity[idx]))
56
+ sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
57
+ sim_scores = sim_scores[1:26]
58
+ movie_indices = [i[0] for i in sim_scores]
59
+
60
+ movies_qualified = movies.iloc[movie_indices][['id','title', 'vote_count', 'vote_average', 'year']]
61
+ vote_counts = movies_qualified[movies_qualified['vote_count'].notnull()]['vote_count'].astype('int')
62
+ vote_averages = movies_qualified[movies_qualified['vote_average'].notnull()]['vote_average'].astype('int')
63
+ C = vote_averages.mean()
64
+ m = vote_counts.quantile(0.60)
65
+
66
+ def weighted_rating(x, C=C, m=m):
67
+ v = x['vote_count']
68
+ R = x['vote_average']
69
+ return (v / (v + m) * R) + (m / (m + v) * C)
70
+
71
+ qualified_movies = movies_qualified[(movies_qualified['vote_count'] >= m) & (movies_qualified['vote_count'].notnull()) & (movies_qualified['vote_average'].notnull())]
72
+ qualified_movies['vote_count'] = qualified_movies['vote_count'].astype('int')
73
+ qualified_movies['vote_average'] = qualified_movies['vote_average'].astype('int')
74
+ qualified_movies['wr'] = qualified_movies.apply(lambda x: weighted_rating(x), axis=1)
75
+ qualified_movies = qualified_movies.sort_values('wr', ascending=False).head(10)
76
+
77
+ return qualified_movies