Spaces:
Sleeping
Sleeping
amirakhlaghiqqq
commited on
Commit
·
3b47f92
1
Parent(s):
3d83f42
Upload 3 files
Browse files- app.py +100 -0
- recomender.py +121 -0
- requirements.txt +5 -0
app.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import recomender
|
2 |
+
import streamlit as st
|
3 |
+
import requests
|
4 |
+
import pandas as pd
|
5 |
+
|
6 |
+
|
7 |
+
|
8 |
+
df_cbf_Q = pd.read_csv('df_cbf_Q.csv', low_memory = False)
|
9 |
+
list_of_all_movies = list(df_cbf_Q['original_title'])
|
10 |
+
################################################################
|
11 |
+
def get_movie_information(movie_title):
|
12 |
+
params={"apikey": "c176e26f", "t": movie_title, "plot": "full"}
|
13 |
+
response = requests.get("http://www.omdbapi.com/", params=params)
|
14 |
+
|
15 |
+
if response.status_code == 200:
|
16 |
+
data = response.json()
|
17 |
+
|
18 |
+
if data["Response"] == "False":
|
19 |
+
print(data["Error"])
|
20 |
+
else:
|
21 |
+
return data
|
22 |
+
else:
|
23 |
+
print("Error:", response.status_code)
|
24 |
+
|
25 |
+
|
26 |
+
|
27 |
+
|
28 |
+
#######################################################################
|
29 |
+
def show_image(index):
|
30 |
+
dict_movie =get_movie_information(list_of_movies[index])
|
31 |
+
st.header(dict_movie["Title"])
|
32 |
+
st.subheader(dict_movie["Year"])
|
33 |
+
if dict_movie["Poster"] != "N/A":
|
34 |
+
st.image(dict_movie["Poster"], use_column_width=False)
|
35 |
+
else:
|
36 |
+
st.write("Poster is not available!")
|
37 |
+
|
38 |
+
st.markdown(f"**IMDB Rating:** {dict_movie['imdbRating']} / 10")
|
39 |
+
st.markdown(f"**Director:** {dict_movie['Director']}")
|
40 |
+
st.markdown(f"**Actors:** {dict_movie['Actors']}")
|
41 |
+
st.markdown(f"**Writer:** {dict_movie['Writer']}")
|
42 |
+
st.markdown(f"**Genre:** {dict_movie['Genre']}")
|
43 |
+
st.markdown(f"**Year:** {dict_movie['Year']}")
|
44 |
+
st.markdown(f"**Country:** {dict_movie['Country']}")
|
45 |
+
st.markdown(f"**Language:** {dict_movie['Language']}")
|
46 |
+
st.write(f"**Plot:** {dict_movie['Plot']}")
|
47 |
+
|
48 |
+
|
49 |
+
#####################################################################################################
|
50 |
+
|
51 |
+
|
52 |
+
|
53 |
+
st.set_page_config(page_title="Movie Info", page_icon=":movie_camera:")
|
54 |
+
st.title("Movie Recommender Engine")
|
55 |
+
|
56 |
+
system_option = st.radio(" How would you like us to choose your next movie?"
|
57 |
+
,("Best Movies of all time","Trend Movies","Special for You:)"))
|
58 |
+
|
59 |
+
movies_watched = st.multiselect("What are your top three fovorite movies? (At least 3 movies)", list_of_all_movies)
|
60 |
+
|
61 |
+
|
62 |
+
|
63 |
+
|
64 |
+
|
65 |
+
|
66 |
+
|
67 |
+
###########################################################################################
|
68 |
+
suggest_button = st.button("Suggests me new movies to watch! ")
|
69 |
+
|
70 |
+
if suggest_button:
|
71 |
+
if len(movies_watched) < 3:
|
72 |
+
st.error(" You need to mention 3 movies!")
|
73 |
+
|
74 |
+
else:
|
75 |
+
if system_option == "Best Movies of all time":
|
76 |
+
list_of_movies = recomender.final_recommender_hot_picks_of_all_time(movies_watched)
|
77 |
+
|
78 |
+
elif system_option == "Trend Movies":
|
79 |
+
|
80 |
+
list_of_movies = recomender.final_recommender_hot_picks_now(movies_watched)
|
81 |
+
|
82 |
+
else:
|
83 |
+
list_of_movies = recomender.final_recommender_for_you(movies_watched)
|
84 |
+
|
85 |
+
|
86 |
+
for i,_ in enumerate(list_of_movies):
|
87 |
+
show_image(i)
|
88 |
+
|
89 |
+
|
90 |
+
|
91 |
+
|
92 |
+
|
93 |
+
|
94 |
+
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
|
100 |
+
|
recomender.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
import random
|
4 |
+
|
5 |
+
from sklearn.feature_extraction.text import TfidfVectorizer #for TF-IDF
|
6 |
+
from sklearn.metrics.pairwise import linear_kernel
|
7 |
+
W_belongs_to_collection = 0.16
|
8 |
+
W_genres = 0.10
|
9 |
+
W_original_language = 0.01
|
10 |
+
W_title = 0.11
|
11 |
+
W_overview = 0.08
|
12 |
+
W_production_countries = 0.01
|
13 |
+
W_production_companies = 0.02
|
14 |
+
W_tagline = 0.10
|
15 |
+
W_keywords = 0.10
|
16 |
+
W_Director = 0.03
|
17 |
+
W_Writer = 0.02
|
18 |
+
W_Cast = 0.02
|
19 |
+
W_Top_Cast = 0.03
|
20 |
+
W_budget_categorized = 0.01
|
21 |
+
W_length = 0.02
|
22 |
+
W_average_vote_categorized = 0.08
|
23 |
+
W_count_vote_categorized = 0.07
|
24 |
+
W_era = 0.03
|
25 |
+
tfidf = TfidfVectorizer(stop_words='english') #defining tfidf model which removes additional words such as 'the', 'or', 'in'
|
26 |
+
|
27 |
+
df_popular_popularity = pd.read_csv('df_popular_popularity.csv', low_memory = False)
|
28 |
+
df_popular_WR_Q = pd.read_csv('df_popular_WR_Q.csv', low_memory = False)
|
29 |
+
df_cbf_Q = pd.read_csv('df_cbf_Q.csv', low_memory = False)
|
30 |
+
|
31 |
+
|
32 |
+
df_cbf_Q['belongs_to_collection'] = df_cbf_Q['belongs_to_collection'].fillna("")
|
33 |
+
df_cbf_Q['overview'] = df_cbf_Q['overview'].fillna("")
|
34 |
+
df_cbf_Q['spoken_languages'] = df_cbf_Q['spoken_languages'].fillna("")
|
35 |
+
df_cbf_Q['tagline'] = df_cbf_Q['tagline'].fillna("")
|
36 |
+
df_cbf_Q['Director'] = df_cbf_Q['Director'].fillna("")
|
37 |
+
df_cbf_Q['Writer'] = df_cbf_Q['Writer'].fillna("")
|
38 |
+
|
39 |
+
|
40 |
+
df_cbf1 = df_cbf_Q
|
41 |
+
df_cbf2 = df_cbf_Q
|
42 |
+
|
43 |
+
|
44 |
+
df_cbf_tfidf_belongs_to_collection = tfidf.fit_transform(df_cbf1['belongs_to_collection'])
|
45 |
+
cosine_sim_belongs_to_collection = linear_kernel(df_cbf_tfidf_belongs_to_collection, df_cbf_tfidf_belongs_to_collection)
|
46 |
+
df_cbf_tfidf_genres = tfidf.fit_transform(df_cbf1['genres'])
|
47 |
+
cosine_sim_genres = linear_kernel(df_cbf_tfidf_genres, df_cbf_tfidf_genres)
|
48 |
+
df_cbf_tfidf_original_language = tfidf.fit_transform(df_cbf1['original_language'])
|
49 |
+
cosine_sim_original_language = linear_kernel(df_cbf_tfidf_original_language, df_cbf_tfidf_original_language)
|
50 |
+
df_cbf_tfidf_title = tfidf.fit_transform(df_cbf1['title'])
|
51 |
+
cosine_sim_title = linear_kernel(df_cbf_tfidf_title, df_cbf_tfidf_title)
|
52 |
+
df_cbf_tfidf_overview = tfidf.fit_transform(df_cbf1['overview'])
|
53 |
+
cosine_sim_overview = linear_kernel(df_cbf_tfidf_overview, df_cbf_tfidf_overview)
|
54 |
+
df_cbf_tfidf_pruduction_countries = tfidf.fit_transform(df_cbf1['production_countries'])
|
55 |
+
cosine_sim_pruduction_countries = linear_kernel(df_cbf_tfidf_pruduction_countries, df_cbf_tfidf_pruduction_countries)
|
56 |
+
df_cbf_tfidf_pruduction_companies = tfidf.fit_transform(df_cbf1['production_companies'])
|
57 |
+
cosine_sim_pruduction_companies = linear_kernel(df_cbf_tfidf_pruduction_companies, df_cbf_tfidf_pruduction_companies)
|
58 |
+
df_cbf_tfidf_tagline = tfidf.fit_transform(df_cbf1['tagline'])
|
59 |
+
cosine_sim_tagline = linear_kernel(df_cbf_tfidf_tagline, df_cbf_tfidf_tagline)
|
60 |
+
df_cbf_tfidf_keywords = tfidf.fit_transform(df_cbf1['keywords'])
|
61 |
+
cosine_sim_keywords = linear_kernel(df_cbf_tfidf_keywords, df_cbf_tfidf_keywords)
|
62 |
+
df_cbf_tfidf_Director = tfidf.fit_transform(df_cbf1['Director'])
|
63 |
+
cosine_sim_Director = linear_kernel(df_cbf_tfidf_Director, df_cbf_tfidf_Director)
|
64 |
+
df_cbf_tfidf_Writer = tfidf.fit_transform(df_cbf1['Writer'])
|
65 |
+
cosine_sim_Writer = linear_kernel(df_cbf_tfidf_Writer, df_cbf_tfidf_Writer)
|
66 |
+
df_cbf_tfidf_Cast = tfidf.fit_transform(df_cbf1['Cast'])
|
67 |
+
cosine_sim_Cast = linear_kernel(df_cbf_tfidf_Cast, df_cbf_tfidf_Cast)
|
68 |
+
df_cbf_tfidf_Top_Cast = tfidf.fit_transform(df_cbf1['Top Cast'])
|
69 |
+
cosine_sim_Top_Cast = linear_kernel(df_cbf_tfidf_Top_Cast, df_cbf_tfidf_Top_Cast)
|
70 |
+
df_cbf_tfidf_budget_categorized = tfidf.fit_transform(df_cbf1['budget_categorized'])
|
71 |
+
cosine_sim_budget_categorized = linear_kernel(df_cbf_tfidf_budget_categorized, df_cbf_tfidf_budget_categorized)
|
72 |
+
df_cbf_tfidf_Length = tfidf.fit_transform(df_cbf1['Length'])
|
73 |
+
cosine_sim_Length = linear_kernel(df_cbf_tfidf_Length, df_cbf_tfidf_Length)
|
74 |
+
df_cbf_tfidf_average_vote_categorized = tfidf.fit_transform(df_cbf1['average_vote_categorized'])
|
75 |
+
cosine_sim_average_vote_categorized = linear_kernel(df_cbf_tfidf_average_vote_categorized, df_cbf_tfidf_average_vote_categorized)
|
76 |
+
df_cbf_tfidf_count_vote_categorized = tfidf.fit_transform(df_cbf1['count_vote_categorized'])
|
77 |
+
cosine_sim_count_vote_categorized = linear_kernel(df_cbf_tfidf_count_vote_categorized, df_cbf_tfidf_count_vote_categorized)
|
78 |
+
df_cbf_tfidf_era = tfidf.fit_transform(df_cbf1['era'])
|
79 |
+
cosine_sim_era = linear_kernel(df_cbf_tfidf_era, df_cbf_tfidf_era)
|
80 |
+
|
81 |
+
|
82 |
+
|
83 |
+
cosin_sim_final = np.multiply(cosine_sim_belongs_to_collection, W_belongs_to_collection) + np.multiply(cosine_sim_genres, W_genres) + np.multiply(cosine_sim_original_language, W_original_language) + np.multiply(cosine_sim_title, W_title) + np.multiply(cosine_sim_overview, W_overview) + np.multiply(cosine_sim_pruduction_countries, W_production_countries) + np.multiply(cosine_sim_pruduction_companies, W_production_companies) + np.multiply(cosine_sim_tagline, W_tagline) + np.multiply(cosine_sim_keywords, W_keywords) + np.multiply(cosine_sim_Director, W_Director) + np.multiply(cosine_sim_Writer, W_Writer) + np.multiply(cosine_sim_Cast, W_Cast) + np.multiply(cosine_sim_Top_Cast, W_Top_Cast) + np.multiply(cosine_sim_budget_categorized, W_budget_categorized) + np.multiply(cosine_sim_Length, W_length) + np.multiply(cosine_sim_average_vote_categorized, W_average_vote_categorized) + np.multiply(cosine_sim_count_vote_categorized, W_count_vote_categorized) + np.multiply(cosine_sim_era, W_era)
|
84 |
+
df_cbf2_indices = pd.Series(df_cbf2.index, index=df_cbf2['title'])
|
85 |
+
|
86 |
+
|
87 |
+
|
88 |
+
def final_recommender_hot_picks_now(Watched_movies_list):
|
89 |
+
recommended_list = []
|
90 |
+
for i in range(10):
|
91 |
+
recommended_list.append(df_popular_popularity.loc[i, 'title'])
|
92 |
+
return recommended_list
|
93 |
+
|
94 |
+
#recommend based on weighted ratings
|
95 |
+
def final_recommender_hot_picks_of_all_time(Watched_movies_list):
|
96 |
+
recommended_list = []
|
97 |
+
for i in range(10):
|
98 |
+
recommended_list.append(df_popular_WR_Q.loc[i, 'title'])
|
99 |
+
return recommended_list
|
100 |
+
|
101 |
+
#recommend based on content based
|
102 |
+
def final_recommender_for_you(Watched_movies_list):
|
103 |
+
recommended_list = []
|
104 |
+
if len(Watched_movies_list) < 3:
|
105 |
+
for i in range(10):
|
106 |
+
recommended_list.append(df_popular_WR_Q.loc[i, 'title'])
|
107 |
+
else:
|
108 |
+
recently_watched = Watched_movies_list[-3:]
|
109 |
+
for i in range(len(recently_watched)):
|
110 |
+
y = df_cbf2_indices[recently_watched[i]]
|
111 |
+
z = list(enumerate(cosin_sim_final[y]))
|
112 |
+
z = sorted(z, key=lambda x: x[1], reverse=True)
|
113 |
+
z = z[1:16]
|
114 |
+
k = [i[0] for i in z]
|
115 |
+
for j in k:
|
116 |
+
recommended_list.append(df_cbf2.loc[j, 'title'])
|
117 |
+
recommended_list = list(set(recommended_list))
|
118 |
+
random.shuffle(recommended_list)
|
119 |
+
recommended_list = recommended_list[:15]
|
120 |
+
return recommended_list
|
121 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
numpy
|
2 |
+
pandas
|
3 |
+
random
|
4 |
+
sklearn
|
5 |
+
requests
|