amirakhlaghiqqq commited on
Commit
3b47f92
·
1 Parent(s): 3d83f42

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +100 -0
  2. recomender.py +121 -0
  3. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import recomender
2
+ import streamlit as st
3
+ import requests
4
+ import pandas as pd
5
+
6
+
7
+
8
+ df_cbf_Q = pd.read_csv('df_cbf_Q.csv', low_memory = False)
9
+ list_of_all_movies = list(df_cbf_Q['original_title'])
10
+ ################################################################
11
+ def get_movie_information(movie_title):
12
+ params={"apikey": "c176e26f", "t": movie_title, "plot": "full"}
13
+ response = requests.get("http://www.omdbapi.com/", params=params)
14
+
15
+ if response.status_code == 200:
16
+ data = response.json()
17
+
18
+ if data["Response"] == "False":
19
+ print(data["Error"])
20
+ else:
21
+ return data
22
+ else:
23
+ print("Error:", response.status_code)
24
+
25
+
26
+
27
+
28
+ #######################################################################
29
+ def show_image(index):
30
+ dict_movie =get_movie_information(list_of_movies[index])
31
+ st.header(dict_movie["Title"])
32
+ st.subheader(dict_movie["Year"])
33
+ if dict_movie["Poster"] != "N/A":
34
+ st.image(dict_movie["Poster"], use_column_width=False)
35
+ else:
36
+ st.write("Poster is not available!")
37
+
38
+ st.markdown(f"**IMDB Rating:** {dict_movie['imdbRating']} / 10")
39
+ st.markdown(f"**Director:** {dict_movie['Director']}")
40
+ st.markdown(f"**Actors:** {dict_movie['Actors']}")
41
+ st.markdown(f"**Writer:** {dict_movie['Writer']}")
42
+ st.markdown(f"**Genre:** {dict_movie['Genre']}")
43
+ st.markdown(f"**Year:** {dict_movie['Year']}")
44
+ st.markdown(f"**Country:** {dict_movie['Country']}")
45
+ st.markdown(f"**Language:** {dict_movie['Language']}")
46
+ st.write(f"**Plot:** {dict_movie['Plot']}")
47
+
48
+
49
+ #####################################################################################################
50
+
51
+
52
+
53
+ st.set_page_config(page_title="Movie Info", page_icon=":movie_camera:")
54
+ st.title("Movie Recommender Engine")
55
+
56
+ system_option = st.radio(" How would you like us to choose your next movie?"
57
+ ,("Best Movies of all time","Trend Movies","Special for You:)"))
58
+
59
+ movies_watched = st.multiselect("What are your top three fovorite movies? (At least 3 movies)", list_of_all_movies)
60
+
61
+
62
+
63
+
64
+
65
+
66
+
67
+ ###########################################################################################
68
+ suggest_button = st.button("Suggests me new movies to watch! ")
69
+
70
+ if suggest_button:
71
+ if len(movies_watched) < 3:
72
+ st.error(" You need to mention 3 movies!")
73
+
74
+ else:
75
+ if system_option == "Best Movies of all time":
76
+ list_of_movies = recomender.final_recommender_hot_picks_of_all_time(movies_watched)
77
+
78
+ elif system_option == "Trend Movies":
79
+
80
+ list_of_movies = recomender.final_recommender_hot_picks_now(movies_watched)
81
+
82
+ else:
83
+ list_of_movies = recomender.final_recommender_for_you(movies_watched)
84
+
85
+
86
+ for i,_ in enumerate(list_of_movies):
87
+ show_image(i)
88
+
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
recomender.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import random
4
+
5
+ from sklearn.feature_extraction.text import TfidfVectorizer #for TF-IDF
6
+ from sklearn.metrics.pairwise import linear_kernel
7
+ W_belongs_to_collection = 0.16
8
+ W_genres = 0.10
9
+ W_original_language = 0.01
10
+ W_title = 0.11
11
+ W_overview = 0.08
12
+ W_production_countries = 0.01
13
+ W_production_companies = 0.02
14
+ W_tagline = 0.10
15
+ W_keywords = 0.10
16
+ W_Director = 0.03
17
+ W_Writer = 0.02
18
+ W_Cast = 0.02
19
+ W_Top_Cast = 0.03
20
+ W_budget_categorized = 0.01
21
+ W_length = 0.02
22
+ W_average_vote_categorized = 0.08
23
+ W_count_vote_categorized = 0.07
24
+ W_era = 0.03
25
+ tfidf = TfidfVectorizer(stop_words='english') #defining tfidf model which removes additional words such as 'the', 'or', 'in'
26
+
27
+ df_popular_popularity = pd.read_csv('df_popular_popularity.csv', low_memory = False)
28
+ df_popular_WR_Q = pd.read_csv('df_popular_WR_Q.csv', low_memory = False)
29
+ df_cbf_Q = pd.read_csv('df_cbf_Q.csv', low_memory = False)
30
+
31
+
32
+ df_cbf_Q['belongs_to_collection'] = df_cbf_Q['belongs_to_collection'].fillna("")
33
+ df_cbf_Q['overview'] = df_cbf_Q['overview'].fillna("")
34
+ df_cbf_Q['spoken_languages'] = df_cbf_Q['spoken_languages'].fillna("")
35
+ df_cbf_Q['tagline'] = df_cbf_Q['tagline'].fillna("")
36
+ df_cbf_Q['Director'] = df_cbf_Q['Director'].fillna("")
37
+ df_cbf_Q['Writer'] = df_cbf_Q['Writer'].fillna("")
38
+
39
+
40
+ df_cbf1 = df_cbf_Q
41
+ df_cbf2 = df_cbf_Q
42
+
43
+
44
+ df_cbf_tfidf_belongs_to_collection = tfidf.fit_transform(df_cbf1['belongs_to_collection'])
45
+ cosine_sim_belongs_to_collection = linear_kernel(df_cbf_tfidf_belongs_to_collection, df_cbf_tfidf_belongs_to_collection)
46
+ df_cbf_tfidf_genres = tfidf.fit_transform(df_cbf1['genres'])
47
+ cosine_sim_genres = linear_kernel(df_cbf_tfidf_genres, df_cbf_tfidf_genres)
48
+ df_cbf_tfidf_original_language = tfidf.fit_transform(df_cbf1['original_language'])
49
+ cosine_sim_original_language = linear_kernel(df_cbf_tfidf_original_language, df_cbf_tfidf_original_language)
50
+ df_cbf_tfidf_title = tfidf.fit_transform(df_cbf1['title'])
51
+ cosine_sim_title = linear_kernel(df_cbf_tfidf_title, df_cbf_tfidf_title)
52
+ df_cbf_tfidf_overview = tfidf.fit_transform(df_cbf1['overview'])
53
+ cosine_sim_overview = linear_kernel(df_cbf_tfidf_overview, df_cbf_tfidf_overview)
54
+ df_cbf_tfidf_pruduction_countries = tfidf.fit_transform(df_cbf1['production_countries'])
55
+ cosine_sim_pruduction_countries = linear_kernel(df_cbf_tfidf_pruduction_countries, df_cbf_tfidf_pruduction_countries)
56
+ df_cbf_tfidf_pruduction_companies = tfidf.fit_transform(df_cbf1['production_companies'])
57
+ cosine_sim_pruduction_companies = linear_kernel(df_cbf_tfidf_pruduction_companies, df_cbf_tfidf_pruduction_companies)
58
+ df_cbf_tfidf_tagline = tfidf.fit_transform(df_cbf1['tagline'])
59
+ cosine_sim_tagline = linear_kernel(df_cbf_tfidf_tagline, df_cbf_tfidf_tagline)
60
+ df_cbf_tfidf_keywords = tfidf.fit_transform(df_cbf1['keywords'])
61
+ cosine_sim_keywords = linear_kernel(df_cbf_tfidf_keywords, df_cbf_tfidf_keywords)
62
+ df_cbf_tfidf_Director = tfidf.fit_transform(df_cbf1['Director'])
63
+ cosine_sim_Director = linear_kernel(df_cbf_tfidf_Director, df_cbf_tfidf_Director)
64
+ df_cbf_tfidf_Writer = tfidf.fit_transform(df_cbf1['Writer'])
65
+ cosine_sim_Writer = linear_kernel(df_cbf_tfidf_Writer, df_cbf_tfidf_Writer)
66
+ df_cbf_tfidf_Cast = tfidf.fit_transform(df_cbf1['Cast'])
67
+ cosine_sim_Cast = linear_kernel(df_cbf_tfidf_Cast, df_cbf_tfidf_Cast)
68
+ df_cbf_tfidf_Top_Cast = tfidf.fit_transform(df_cbf1['Top Cast'])
69
+ cosine_sim_Top_Cast = linear_kernel(df_cbf_tfidf_Top_Cast, df_cbf_tfidf_Top_Cast)
70
+ df_cbf_tfidf_budget_categorized = tfidf.fit_transform(df_cbf1['budget_categorized'])
71
+ cosine_sim_budget_categorized = linear_kernel(df_cbf_tfidf_budget_categorized, df_cbf_tfidf_budget_categorized)
72
+ df_cbf_tfidf_Length = tfidf.fit_transform(df_cbf1['Length'])
73
+ cosine_sim_Length = linear_kernel(df_cbf_tfidf_Length, df_cbf_tfidf_Length)
74
+ df_cbf_tfidf_average_vote_categorized = tfidf.fit_transform(df_cbf1['average_vote_categorized'])
75
+ cosine_sim_average_vote_categorized = linear_kernel(df_cbf_tfidf_average_vote_categorized, df_cbf_tfidf_average_vote_categorized)
76
+ df_cbf_tfidf_count_vote_categorized = tfidf.fit_transform(df_cbf1['count_vote_categorized'])
77
+ cosine_sim_count_vote_categorized = linear_kernel(df_cbf_tfidf_count_vote_categorized, df_cbf_tfidf_count_vote_categorized)
78
+ df_cbf_tfidf_era = tfidf.fit_transform(df_cbf1['era'])
79
+ cosine_sim_era = linear_kernel(df_cbf_tfidf_era, df_cbf_tfidf_era)
80
+
81
+
82
+
83
+ cosin_sim_final = np.multiply(cosine_sim_belongs_to_collection, W_belongs_to_collection) + np.multiply(cosine_sim_genres, W_genres) + np.multiply(cosine_sim_original_language, W_original_language) + np.multiply(cosine_sim_title, W_title) + np.multiply(cosine_sim_overview, W_overview) + np.multiply(cosine_sim_pruduction_countries, W_production_countries) + np.multiply(cosine_sim_pruduction_companies, W_production_companies) + np.multiply(cosine_sim_tagline, W_tagline) + np.multiply(cosine_sim_keywords, W_keywords) + np.multiply(cosine_sim_Director, W_Director) + np.multiply(cosine_sim_Writer, W_Writer) + np.multiply(cosine_sim_Cast, W_Cast) + np.multiply(cosine_sim_Top_Cast, W_Top_Cast) + np.multiply(cosine_sim_budget_categorized, W_budget_categorized) + np.multiply(cosine_sim_Length, W_length) + np.multiply(cosine_sim_average_vote_categorized, W_average_vote_categorized) + np.multiply(cosine_sim_count_vote_categorized, W_count_vote_categorized) + np.multiply(cosine_sim_era, W_era)
84
+ df_cbf2_indices = pd.Series(df_cbf2.index, index=df_cbf2['title'])
85
+
86
+
87
+
88
+ def final_recommender_hot_picks_now(Watched_movies_list):
89
+ recommended_list = []
90
+ for i in range(10):
91
+ recommended_list.append(df_popular_popularity.loc[i, 'title'])
92
+ return recommended_list
93
+
94
+ #recommend based on weighted ratings
95
+ def final_recommender_hot_picks_of_all_time(Watched_movies_list):
96
+ recommended_list = []
97
+ for i in range(10):
98
+ recommended_list.append(df_popular_WR_Q.loc[i, 'title'])
99
+ return recommended_list
100
+
101
+ #recommend based on content based
102
+ def final_recommender_for_you(Watched_movies_list):
103
+ recommended_list = []
104
+ if len(Watched_movies_list) < 3:
105
+ for i in range(10):
106
+ recommended_list.append(df_popular_WR_Q.loc[i, 'title'])
107
+ else:
108
+ recently_watched = Watched_movies_list[-3:]
109
+ for i in range(len(recently_watched)):
110
+ y = df_cbf2_indices[recently_watched[i]]
111
+ z = list(enumerate(cosin_sim_final[y]))
112
+ z = sorted(z, key=lambda x: x[1], reverse=True)
113
+ z = z[1:16]
114
+ k = [i[0] for i in z]
115
+ for j in k:
116
+ recommended_list.append(df_cbf2.loc[j, 'title'])
117
+ recommended_list = list(set(recommended_list))
118
+ random.shuffle(recommended_list)
119
+ recommended_list = recommended_list[:15]
120
+ return recommended_list
121
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ numpy
2
+ pandas
3
+ random
4
+ sklearn
5
+ requests