tobiasaurer commited on
Commit
c0503dc
1 Parent(s): 990dd96

updated files to newest version

Browse files
pages/1 - Popularity-Based Recommender.py CHANGED
@@ -18,23 +18,11 @@ movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = mo
18
 
19
  # INSTRUCTIONS:
20
  st.title("Popularity-Based Recommender")
21
- st.write("""
22
- ### Instructions
23
- Move the slider to the desired number of recommendations you wish to receive.
24
-
25
- __Optional__: You can receive links for popular streaming services for each recommendation (if available) by selecting in your countrycode.
26
- Leave this field empty if you don't want to get streaming links.
27
-
28
- __Optional__: You can narrow down the recommendations by picking one or several genre(s).
29
- However, the more genres you choose, the fewer movies will be recommended.
30
-
31
- Afterwards, simply click the "Get Recommendations" button to receive recommendations based on the most popular movies in our database.
32
- """)
33
 
34
  # FUNCTIONS:
35
 
36
  def get_popular_recommendations(n, genres):
37
- return (
38
  ratings
39
  .groupby('movieId')
40
  .agg(avg_rating = ('rating', 'mean'), num_ratings = ('rating', 'count'))
@@ -44,7 +32,9 @@ def get_popular_recommendations(n, genres):
44
  .sort_values('combined_rating', ascending=False)
45
  .head(n)
46
  [['title', 'avg_rating', 'genres']]
 
47
  )
 
48
 
49
  def get_popular_recommendations_streaming(n, genres, country, url, headers):
50
  recommendations = (
@@ -81,7 +71,9 @@ def get_popular_recommendations_streaming(n, genres, country, url, headers):
81
  for streaming_service in streaming_info['streamingInfo']:
82
  recommendations_ids.loc[recommendations_ids['imdbId'] == id, 'Streaming Availability'] += f"{streaming_service}: {streaming_info['streamingInfo'][streaming_service][country]['link']} \n"
83
 
84
- return recommendations_ids[['title', 'genres', 'Streaming Availability']]
 
 
85
 
86
  def transform_genre_to_regex(genres):
87
  regex = ""
@@ -90,13 +82,25 @@ def transform_genre_to_regex(genres):
90
  return regex
91
 
92
  # USER INPUT:
 
 
 
 
93
  number_of_recommendations = st.slider("Number of recommendations", 1, 10, 5)
94
 
 
 
 
 
95
  genre_list = set([inner for outer in movies.genres.str.split('|') for inner in outer])
96
  genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
97
  genres_regex = transform_genre_to_regex(genres)
98
 
99
- streaming_country = st.selectbox('Optional: Country for streaming information', ('de', 'us'))
 
 
 
 
100
 
101
  # API INFORMATION:
102
  url = "https://streaming-availability.p.rapidapi.com/get/basic"
@@ -108,7 +112,8 @@ headers = {
108
  # EXECUTION:
109
 
110
  if st.button("Get Recommendations"):
111
- if streaming_country == '':
112
  st.write(get_popular_recommendations(number_of_recommendations, genres_regex))
113
  else:
 
114
  st.write(get_popular_recommendations_streaming(number_of_recommendations, genres_regex, streaming_country, url, headers))
 
18
 
19
  # INSTRUCTIONS:
20
  st.title("Popularity-Based Recommender")
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  # FUNCTIONS:
23
 
24
  def get_popular_recommendations(n, genres):
25
+ recommendations = (
26
  ratings
27
  .groupby('movieId')
28
  .agg(avg_rating = ('rating', 'mean'), num_ratings = ('rating', 'count'))
 
32
  .sort_values('combined_rating', ascending=False)
33
  .head(n)
34
  [['title', 'avg_rating', 'genres']]
35
+ .rename(columns= {'title': 'Movie Title', 'avg_rating': 'Average Rating', 'genres': 'Genres'}, inplace = True)
36
  )
37
+ return recommendations
38
 
39
  def get_popular_recommendations_streaming(n, genres, country, url, headers):
40
  recommendations = (
 
71
  for streaming_service in streaming_info['streamingInfo']:
72
  recommendations_ids.loc[recommendations_ids['imdbId'] == id, 'Streaming Availability'] += f"{streaming_service}: {streaming_info['streamingInfo'][streaming_service][country]['link']} \n"
73
 
74
+ recommendations_ids.rename(columns= {'title': 'Movie Title', 'genres': 'Genres'}, inplace = True)
75
+
76
+ return recommendations_ids[['Movie Title', 'Genres', 'Streaming Availability']]
77
 
78
  def transform_genre_to_regex(genres):
79
  regex = ""
 
82
  return regex
83
 
84
  # USER INPUT:
85
+
86
+ st.write("""
87
+ Move the slider to the desired number of recommendations you wish to receive.
88
+ """)
89
  number_of_recommendations = st.slider("Number of recommendations", 1, 10, 5)
90
 
91
+ st.write("""
92
+ __Optional__: You can narrow down the recommendations by picking one or several genre(s).
93
+ However, the more genres you choose, the fewer movies will be recommended.
94
+ """)
95
  genre_list = set([inner for outer in movies.genres.str.split('|') for inner in outer])
96
  genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
97
  genres_regex = transform_genre_to_regex(genres)
98
 
99
+ st.write("""
100
+ __Optional__: You can receive links for popular streaming services for each recommendation (if available) by selecting your countrycode.
101
+ Select none if you don't want to get streaming links.
102
+ """)
103
+ streaming_country = st.selectbox('Optional: Country for streaming information', ('none', 'de', 'us'))
104
 
105
  # API INFORMATION:
106
  url = "https://streaming-availability.p.rapidapi.com/get/basic"
 
112
  # EXECUTION:
113
 
114
  if st.button("Get Recommendations"):
115
+ if streaming_country == 'none':
116
  st.write(get_popular_recommendations(number_of_recommendations, genres_regex))
117
  else:
118
+ st.write("Double-click on a Streaming-Availability cell to see all options.")
119
  st.write(get_popular_recommendations_streaming(number_of_recommendations, genres_regex, streaming_country, url, headers))
pages/2 - Similarity-Based Recommender.py CHANGED
@@ -28,21 +28,8 @@ similarities_movies = pd.DataFrame(cosine_similarity(movie_user_matrix),
28
  index=movie_user_matrix.index,
29
  columns=movie_user_matrix.index)
30
 
31
- # INSTRUCTIONS:
32
  st.title("User-Based Recommender")
33
- st.write("""
34
- ### Instructions
35
- Type in the title of a movie for which you would like to receive similar recommendations.
36
- Move the slider to the desired number of recommendations you wish to receive.
37
-
38
- __Optional__: You can receive links for popular streaming services for each recommendation (if available) by selecting in your countrycode.
39
- Leave this field empty if you don't want to get streaming links.
40
-
41
- __Optional__: You can narrow down the recommendations by picking one or several genre(s).
42
- However, the more genres you choose, the fewer movies will be recommended.
43
-
44
- Afterwards, simply click the "Get Recommendations" button to receive recommendations that are most similar to the given movie.
45
- """)
46
 
47
  # FUNCTIONS:
48
 
@@ -61,6 +48,8 @@ def get_similar_recommendations(movie_title, n, genres):
61
  .head(n)
62
  [['title', 'genres']]
63
  )
 
 
64
 
65
  return recommendations
66
 
@@ -99,7 +88,9 @@ def get_similar_recommendations_streaming(movie_title, n, genres, country, url,
99
  for streaming_service in streaming_info['streamingInfo']:
100
  recommendations_ids.loc[recommendations_ids['imdbId'] == id, 'Streaming Availability'] += f"{streaming_service}: {streaming_info['streamingInfo'][streaming_service][country]['link']} \n"
101
 
102
- return recommendations_ids[['title', 'genres', 'Streaming Availability']]
 
 
103
 
104
 
105
  def transform_genre_to_regex(genres):
@@ -121,17 +112,33 @@ def find_movie_title(user_input):
121
 
122
  return result[0][0]
123
 
 
124
  # USER INPUT:
 
 
 
125
  movie_title_raw = st.text_input('Movie Title')
126
  movie_title = find_movie_title(movie_title_raw)
127
 
 
 
 
128
  number_of_recommendations = st.slider("Number of recommendations", 1, 10, 5)
129
 
 
 
 
 
130
  genre_list = set([inner for outer in movies.genres.str.split('|') for inner in outer])
131
  genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
132
  genres_regex = transform_genre_to_regex(genres)
133
 
134
- streaming_country = st.selectbox('Optional: Country for streaming information', ('de', 'us'))
 
 
 
 
 
135
 
136
  # API INFORMATION:
137
  url = "https://streaming-availability.p.rapidapi.com/get/basic"
@@ -143,7 +150,7 @@ headers = {
143
 
144
  # EXECUTION:
145
  if st.button("Get Recommendations"):
146
- if streaming_country == '':
147
  st.write(get_similar_recommendations(movie_title, number_of_recommendations, genres_regex))
148
  else:
149
  st.write("Double-click on the Streaming-Availability column to see all links.")
 
28
  index=movie_user_matrix.index,
29
  columns=movie_user_matrix.index)
30
 
31
+ # TITLE:
32
  st.title("User-Based Recommender")
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  # FUNCTIONS:
35
 
 
48
  .head(n)
49
  [['title', 'genres']]
50
  )
51
+
52
+ recommendations.rename(columns= {'title': 'Movie Title', 'genres': 'Genres'}, inplace = True)
53
 
54
  return recommendations
55
 
 
88
  for streaming_service in streaming_info['streamingInfo']:
89
  recommendations_ids.loc[recommendations_ids['imdbId'] == id, 'Streaming Availability'] += f"{streaming_service}: {streaming_info['streamingInfo'][streaming_service][country]['link']} \n"
90
 
91
+ recommendations_ids.rename(columns= {'title': 'Movie Title', 'genres': 'Genres'}, inplace = True)
92
+
93
+ return recommendations_ids[['Movie Title', 'Genres', 'Streaming Availability']]
94
 
95
 
96
  def transform_genre_to_regex(genres):
 
112
 
113
  return result[0][0]
114
 
115
+
116
  # USER INPUT:
117
+ st.write("""
118
+ Type in the name of your movie.
119
+ """)
120
  movie_title_raw = st.text_input('Movie Title')
121
  movie_title = find_movie_title(movie_title_raw)
122
 
123
+ st.write("""
124
+ Move the slider to the desired number of recommendations you wish to receive.
125
+ """)
126
  number_of_recommendations = st.slider("Number of recommendations", 1, 10, 5)
127
 
128
+ st.write("""
129
+ __Optional__: You can narrow down the recommendations by picking one or several genre(s).
130
+ However, the more genres you choose, the fewer movies will be recommended.
131
+ """)
132
  genre_list = set([inner for outer in movies.genres.str.split('|') for inner in outer])
133
  genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
134
  genres_regex = transform_genre_to_regex(genres)
135
 
136
+ st.write("""
137
+ __Optional__: You can receive links for popular streaming services for each recommendation (if available) by selecting your countrycode.
138
+ Select none if you don't want to get streaming links.
139
+ """)
140
+ streaming_country = st.selectbox('Optional: Country for streaming information', ('none', 'de', 'us'))
141
+
142
 
143
  # API INFORMATION:
144
  url = "https://streaming-availability.p.rapidapi.com/get/basic"
 
150
 
151
  # EXECUTION:
152
  if st.button("Get Recommendations"):
153
+ if streaming_country == 'none':
154
  st.write(get_similar_recommendations(movie_title, number_of_recommendations, genres_regex))
155
  else:
156
  st.write("Double-click on the Streaming-Availability column to see all links.")
pages/3 - User-Based Recommender.py DELETED
@@ -1,89 +0,0 @@
1
- import streamlit as st
2
- import pandas as pd
3
- from sklearn.metrics.pairwise import cosine_similarity
4
- import re
5
- import requests
6
-
7
- # DATA:
8
- movies = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/movies.csv')
9
- ratings = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/ratings.csv')
10
- links = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/links.csv')
11
-
12
- # clean titles column by moving "The" and "A" to the beginning of the string
13
- # this makes it more searchable for users
14
- movies.loc[lambda df: df["title"].str.contains(", The", regex=True), 'title'] = 'The ' + movies['title']
15
- movies.loc[lambda df: df["title"].str.contains(", The", regex=True), 'title'] = movies['title'].str.replace(", The", '', regex=True)
16
-
17
- movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = 'A ' + movies['title']
18
- movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = movies['title'].str.replace(", A", '', regex=True)
19
-
20
- # create "database" to use for recommendations
21
- user_item_matrix = (
22
- ratings
23
- .merge(movies, on='movieId')[['title', 'rating', 'userId']]
24
- .pivot_table(index='userId', columns='title', values='rating')
25
- .fillna(0)
26
- )
27
-
28
- similarities_users = pd.DataFrame(cosine_similarity(user_item_matrix),
29
- index=user_item_matrix.index,
30
- columns=user_item_matrix.index)
31
-
32
- # INSTRUCTIONS:
33
- st.title("User-Based Recommender")
34
- st.write("""
35
- ### Instructions
36
- Type in the user-ID you want to receive recommendations for.
37
- Move the slider to the desired number of recommendations you wish to receive.
38
-
39
- __Optional__: You can narrow down the recommendations by picking one or several genre(s).
40
- However, the more genres you choose, the fewer movies will be recommended.
41
-
42
- Afterwards, simply click the "Get Recommendations" button to receive recommendations that are most suitable for the given user.
43
- """)
44
-
45
- # FUNCTIONS:
46
-
47
- def get_user_recommendations(user_id, n, genres):
48
-
49
- user_id = int(user_id)
50
- # calculate weights for ratings
51
- weights = similarities_users.loc[similarities_users.index != user_id, user_id] / sum(similarities_users.loc[similarities_users.index != user_id, user_id])
52
-
53
- # get unwatched movies for recommendations
54
- unwatched_movies = (
55
- user_item_matrix
56
- .loc[user_item_matrix.index != user_id, user_item_matrix.loc[user_id,:] == 0]
57
- .T
58
- )
59
-
60
- # compute weighted averages and return the n movies with the highest predicted ratings
61
- weighted_averages = pd.DataFrame(unwatched_movies.dot(weights), columns = ["predicted_rating"])
62
- recommendations = (
63
- weighted_averages
64
- .sort_values("predicted_rating", ascending=False)
65
- .merge(movies, how= 'left', left_index = True, right_on = 'title')
66
- [lambda df: df["genres"].str.contains(genres, regex=True)]
67
- .head(n)
68
- )
69
-
70
- return recommendations[['title', 'genres']]
71
-
72
- def transform_genre_to_regex(genres):
73
- regex = ""
74
- for genre in genres:
75
- regex += f"(?=.*{genre})"
76
- return regex
77
-
78
- # USER INPUT:
79
- user_id_input = st.text_input('User-ID')
80
-
81
- number_of_recommendations = st.slider("Number of recommendations", 1, 10, 5)
82
-
83
- genre_list = set([inner for outer in movies.genres.str.split('|') for inner in outer])
84
- genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
85
- genres_regex = transform_genre_to_regex(genres)
86
-
87
- # EXECUTION:
88
- if st.button("Get Recommendations"):
89
- st.write(get_user_recommendations(user_id_input, number_of_recommendations, genres_regex))