Spaces:

amirakhlaghiqqq
/

Movie_Recommender

Sleeping

App Files Files Community

amirakhlaghiqqq commited on Jul 6, 2023

Commit

3b47f92

1 Parent(s): 3d83f42

Upload 3 files

Browse files

Files changed (3) hide show

app.py +100 -0
recomender.py +121 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import recomender
+import streamlit as st
+import requests
+import pandas as pd
+df_cbf_Q = pd.read_csv('df_cbf_Q.csv', low_memory = False)
+list_of_all_movies = list(df_cbf_Q['original_title'])
+################################################################
+def get_movie_information(movie_title):
+    params={"apikey": "c176e26f", "t": movie_title, "plot": "full"}
+    response = requests.get("http://www.omdbapi.com/", params=params)
+    if response.status_code == 200:
+        data = response.json()
+        if data["Response"] == "False":
+            print(data["Error"])
+        else:
+            return data
+    else:
+        print("Error:", response.status_code)
+#######################################################################
+def show_image(index):
+    dict_movie =get_movie_information(list_of_movies[index])
+    st.header(dict_movie["Title"])
+    st.subheader(dict_movie["Year"])
+    if dict_movie["Poster"] != "N/A":
+        st.image(dict_movie["Poster"], use_column_width=False)
+    else:
+        st.write("Poster is not available!")
+    st.markdown(f"**IMDB Rating:** {dict_movie['imdbRating']} / 10")
+    st.markdown(f"**Director:** {dict_movie['Director']}")
+    st.markdown(f"**Actors:** {dict_movie['Actors']}")
+    st.markdown(f"**Writer:** {dict_movie['Writer']}")
+    st.markdown(f"**Genre:** {dict_movie['Genre']}")
+    st.markdown(f"**Year:** {dict_movie['Year']}")
+    st.markdown(f"**Country:** {dict_movie['Country']}")
+    st.markdown(f"**Language:** {dict_movie['Language']}")
+    st.write(f"**Plot:** {dict_movie['Plot']}")
+#####################################################################################################
+st.set_page_config(page_title="Movie Info", page_icon=":movie_camera:")
+st.title("Movie Recommender Engine")
+system_option = st.radio(" How would you like us to choose your next movie?"
+    ,("Best Movies of all time","Trend Movies","Special for You:)"))
+movies_watched = st.multiselect("What are your top three fovorite movies? (At least 3 movies)", list_of_all_movies)
+###########################################################################################
+suggest_button = st.button("Suggests me new movies to watch! ")
+if suggest_button:
+    if len(movies_watched) < 3:
+        st.error(" You need to mention 3 movies!")
+    else:
+        if system_option == "Best Movies of all time":
+            list_of_movies = recomender.final_recommender_hot_picks_of_all_time(movies_watched)
+        elif  system_option == "Trend Movies":
+            list_of_movies = recomender.final_recommender_hot_picks_now(movies_watched)
+        else:
+            list_of_movies = recomender.final_recommender_for_you(movies_watched)
+        for i,_ in enumerate(list_of_movies):
+            show_image(i)

recomender.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import numpy as np
+import pandas as pd
+import random
+from sklearn.feature_extraction.text import TfidfVectorizer #for TF-IDF
+from sklearn.metrics.pairwise import linear_kernel
+W_belongs_to_collection = 0.16
+W_genres = 0.10
+W_original_language = 0.01
+W_title = 0.11
+W_overview = 0.08
+W_production_countries = 0.01
+W_production_companies = 0.02
+W_tagline = 0.10
+W_keywords = 0.10
+W_Director = 0.03
+W_Writer = 0.02
+W_Cast = 0.02
+W_Top_Cast = 0.03
+W_budget_categorized = 0.01
+W_length = 0.02
+W_average_vote_categorized = 0.08
+W_count_vote_categorized = 0.07
+W_era = 0.03
+tfidf = TfidfVectorizer(stop_words='english') #defining tfidf model which removes additional words such as 'the', 'or', 'in'
+df_popular_popularity = pd.read_csv('df_popular_popularity.csv', low_memory = False)
+df_popular_WR_Q = pd.read_csv('df_popular_WR_Q.csv', low_memory = False)
+df_cbf_Q = pd.read_csv('df_cbf_Q.csv', low_memory = False)
+df_cbf_Q['belongs_to_collection'] = df_cbf_Q['belongs_to_collection'].fillna("")
+df_cbf_Q['overview'] = df_cbf_Q['overview'].fillna("")
+df_cbf_Q['spoken_languages'] = df_cbf_Q['spoken_languages'].fillna("")
+df_cbf_Q['tagline'] = df_cbf_Q['tagline'].fillna("")
+df_cbf_Q['Director'] = df_cbf_Q['Director'].fillna("")
+df_cbf_Q['Writer'] = df_cbf_Q['Writer'].fillna("")
+df_cbf1 = df_cbf_Q
+df_cbf2 = df_cbf_Q
+df_cbf_tfidf_belongs_to_collection = tfidf.fit_transform(df_cbf1['belongs_to_collection'])
+cosine_sim_belongs_to_collection = linear_kernel(df_cbf_tfidf_belongs_to_collection, df_cbf_tfidf_belongs_to_collection)
+df_cbf_tfidf_genres = tfidf.fit_transform(df_cbf1['genres'])
+cosine_sim_genres = linear_kernel(df_cbf_tfidf_genres, df_cbf_tfidf_genres)
+df_cbf_tfidf_original_language = tfidf.fit_transform(df_cbf1['original_language'])
+cosine_sim_original_language = linear_kernel(df_cbf_tfidf_original_language, df_cbf_tfidf_original_language)
+df_cbf_tfidf_title = tfidf.fit_transform(df_cbf1['title'])
+cosine_sim_title = linear_kernel(df_cbf_tfidf_title, df_cbf_tfidf_title)
+df_cbf_tfidf_overview = tfidf.fit_transform(df_cbf1['overview'])
+cosine_sim_overview = linear_kernel(df_cbf_tfidf_overview, df_cbf_tfidf_overview)
+df_cbf_tfidf_pruduction_countries = tfidf.fit_transform(df_cbf1['production_countries'])
+cosine_sim_pruduction_countries = linear_kernel(df_cbf_tfidf_pruduction_countries, df_cbf_tfidf_pruduction_countries)
+df_cbf_tfidf_pruduction_companies = tfidf.fit_transform(df_cbf1['production_companies'])
+cosine_sim_pruduction_companies = linear_kernel(df_cbf_tfidf_pruduction_companies, df_cbf_tfidf_pruduction_companies)
+df_cbf_tfidf_tagline = tfidf.fit_transform(df_cbf1['tagline'])
+cosine_sim_tagline = linear_kernel(df_cbf_tfidf_tagline, df_cbf_tfidf_tagline)
+df_cbf_tfidf_keywords = tfidf.fit_transform(df_cbf1['keywords'])
+cosine_sim_keywords = linear_kernel(df_cbf_tfidf_keywords, df_cbf_tfidf_keywords)
+df_cbf_tfidf_Director = tfidf.fit_transform(df_cbf1['Director'])
+cosine_sim_Director = linear_kernel(df_cbf_tfidf_Director, df_cbf_tfidf_Director)
+df_cbf_tfidf_Writer = tfidf.fit_transform(df_cbf1['Writer'])
+cosine_sim_Writer = linear_kernel(df_cbf_tfidf_Writer, df_cbf_tfidf_Writer)
+df_cbf_tfidf_Cast = tfidf.fit_transform(df_cbf1['Cast'])
+cosine_sim_Cast = linear_kernel(df_cbf_tfidf_Cast, df_cbf_tfidf_Cast)
+df_cbf_tfidf_Top_Cast = tfidf.fit_transform(df_cbf1['Top Cast'])
+cosine_sim_Top_Cast = linear_kernel(df_cbf_tfidf_Top_Cast, df_cbf_tfidf_Top_Cast)
+df_cbf_tfidf_budget_categorized = tfidf.fit_transform(df_cbf1['budget_categorized'])
+cosine_sim_budget_categorized = linear_kernel(df_cbf_tfidf_budget_categorized, df_cbf_tfidf_budget_categorized)
+df_cbf_tfidf_Length = tfidf.fit_transform(df_cbf1['Length'])
+cosine_sim_Length = linear_kernel(df_cbf_tfidf_Length, df_cbf_tfidf_Length)
+df_cbf_tfidf_average_vote_categorized = tfidf.fit_transform(df_cbf1['average_vote_categorized'])
+cosine_sim_average_vote_categorized = linear_kernel(df_cbf_tfidf_average_vote_categorized, df_cbf_tfidf_average_vote_categorized)
+df_cbf_tfidf_count_vote_categorized = tfidf.fit_transform(df_cbf1['count_vote_categorized'])
+cosine_sim_count_vote_categorized = linear_kernel(df_cbf_tfidf_count_vote_categorized, df_cbf_tfidf_count_vote_categorized)
+df_cbf_tfidf_era = tfidf.fit_transform(df_cbf1['era'])
+cosine_sim_era = linear_kernel(df_cbf_tfidf_era, df_cbf_tfidf_era)
+cosin_sim_final = np.multiply(cosine_sim_belongs_to_collection, W_belongs_to_collection) + np.multiply(cosine_sim_genres, W_genres) + np.multiply(cosine_sim_original_language, W_original_language) + np.multiply(cosine_sim_title, W_title) + np.multiply(cosine_sim_overview, W_overview) + np.multiply(cosine_sim_pruduction_countries, W_production_countries) + np.multiply(cosine_sim_pruduction_companies, W_production_companies) + np.multiply(cosine_sim_tagline, W_tagline) + np.multiply(cosine_sim_keywords, W_keywords) + np.multiply(cosine_sim_Director, W_Director) + np.multiply(cosine_sim_Writer, W_Writer) + np.multiply(cosine_sim_Cast, W_Cast) + np.multiply(cosine_sim_Top_Cast, W_Top_Cast) + np.multiply(cosine_sim_budget_categorized, W_budget_categorized) + np.multiply(cosine_sim_Length, W_length) + np.multiply(cosine_sim_average_vote_categorized, W_average_vote_categorized) + np.multiply(cosine_sim_count_vote_categorized, W_count_vote_categorized) + np.multiply(cosine_sim_era, W_era)
+df_cbf2_indices = pd.Series(df_cbf2.index, index=df_cbf2['title'])
+def final_recommender_hot_picks_now(Watched_movies_list):
+  recommended_list = []
+  for i in range(10):
+    recommended_list.append(df_popular_popularity.loc[i, 'title'])
+  return recommended_list
+#recommend based on weighted ratings
+def final_recommender_hot_picks_of_all_time(Watched_movies_list):
+  recommended_list = []
+  for i in range(10):
+    recommended_list.append(df_popular_WR_Q.loc[i, 'title'])
+  return recommended_list
+#recommend based on content based
+def final_recommender_for_you(Watched_movies_list):
+  recommended_list = []
+  if len(Watched_movies_list) < 3:
+    for i in range(10):
+      recommended_list.append(df_popular_WR_Q.loc[i, 'title'])
+  else:
+    recently_watched = Watched_movies_list[-3:]
+    for i in range(len(recently_watched)):
+      y = df_cbf2_indices[recently_watched[i]]
+      z = list(enumerate(cosin_sim_final[y]))
+      z = sorted(z, key=lambda x: x[1], reverse=True)
+      z = z[1:16]
+      k = [i[0] for i in z]
+      for j in k:
+        recommended_list.append(df_cbf2.loc[j, 'title'])
+  recommended_list = list(set(recommended_list))
+  random.shuffle(recommended_list)
+  recommended_list = recommended_list[:15]
+  return recommended_list

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+numpy
+pandas
+random
+sklearn
+requests