tobiasaurer commited on
Commit
4b93540
1 Parent(s): 4ff1ca9

adds year-filter to functions

Browse files
pages/1 - Popularity-Based Recommender.py CHANGED
@@ -16,12 +16,16 @@ movies.loc[lambda df: df["title"].str.contains(", The", regex=True), 'title'] =
16
  movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = 'A ' + movies['title']
17
  movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = movies['title'].str.replace(", A", '', regex=True)
18
 
 
 
 
 
19
  # INSTRUCTIONS:
20
  st.title("Popularity-Based Recommender")
21
 
22
  # FUNCTIONS:
23
 
24
- def get_popular_recommendations(n, genres):
25
  recommendations = (
26
  ratings
27
  .groupby('movieId')
@@ -29,6 +33,7 @@ def get_popular_recommendations(n, genres):
29
  .merge(movies, on='movieId')
30
  .assign(combined_rating = lambda x: x['avg_rating'] * x['num_ratings']**0.5)
31
  [lambda df: df["genres"].str.contains(genres, regex=True)]
 
32
  .sort_values('combined_rating', ascending=False)
33
  .head(n)
34
  [['title', 'avg_rating', 'genres']]
@@ -36,7 +41,7 @@ def get_popular_recommendations(n, genres):
36
  )
37
  return recommendations
38
 
39
- def get_popular_recommendations_streaming(n, genres, country, url, headers):
40
  recommendations = (
41
  ratings
42
  .groupby('movieId')
@@ -44,6 +49,7 @@ def get_popular_recommendations_streaming(n, genres, country, url, headers):
44
  .merge(movies, on='movieId')
45
  .assign(combined_rating = lambda x: x['avg_rating'] * x['num_ratings']**0.5)
46
  [lambda df: df["genres"].str.contains(genres, regex=True)]
 
47
  .sort_values('combined_rating', ascending=False)
48
  .head(n)
49
  [['title', 'avg_rating', 'genres', 'movieId']]
@@ -88,6 +94,11 @@ Move the slider to the desired number of recommendations you wish to receive.
88
  """)
89
  number_of_recommendations = st.slider("Number of recommendations", 1, 10, 5)
90
 
 
 
 
 
 
91
  st.write("""
92
  __Optional__: You can narrow down the recommendations by picking one or several genre(s).
93
  However, the more genres you choose, the fewer movies will be recommended.
@@ -103,6 +114,7 @@ Select none if you don't want to get streaming links.
103
  streaming_country = st.selectbox('Optional: Country for streaming information', ('none', 'de', 'us'))
104
 
105
  # API INFORMATION:
 
106
  url = "https://streaming-availability.p.rapidapi.com/get/basic"
107
  headers = {
108
  "X-RapidAPI-Key": st.secrets["api_key"],
@@ -113,7 +125,11 @@ headers = {
113
 
114
  if st.button("Get Recommendations"):
115
  if streaming_country == 'none':
116
- st.write(get_popular_recommendations(number_of_recommendations, genres_regex))
117
  else:
118
- st.write("Double-click on a Streaming-Availability cell to see all options.")
119
- st.write(get_popular_recommendations_streaming(number_of_recommendations, genres_regex, streaming_country, url, headers))
 
 
 
 
 
16
  movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = 'A ' + movies['title']
17
  movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = movies['title'].str.replace(", A", '', regex=True)
18
 
19
+ # extract year from title and store it in new column
20
+ movies= movies.assign(year = lambda df_ : df_['title'].replace(r'(.*)\((\d{4})\)', r'\2', regex= True))
21
+ movies.year = pd.to_numeric(movies.year, errors= 'coerce').fillna(0).astype('int')
22
+
23
  # INSTRUCTIONS:
24
  st.title("Popularity-Based Recommender")
25
 
26
  # FUNCTIONS:
27
 
28
+ def get_popular_recommendations(n, genres, time_range):
29
  recommendations = (
30
  ratings
31
  .groupby('movieId')
 
33
  .merge(movies, on='movieId')
34
  .assign(combined_rating = lambda x: x['avg_rating'] * x['num_ratings']**0.5)
35
  [lambda df: df["genres"].str.contains(genres, regex=True)]
36
+ .loc[lambda df : ((df['year'] >= time_range[0]) & ( df['year'] <= time_range[1]))]
37
  .sort_values('combined_rating', ascending=False)
38
  .head(n)
39
  [['title', 'avg_rating', 'genres']]
 
41
  )
42
  return recommendations
43
 
44
+ def get_popular_recommendations_streaming(n, genres, time_range, country, url, headers):
45
  recommendations = (
46
  ratings
47
  .groupby('movieId')
 
49
  .merge(movies, on='movieId')
50
  .assign(combined_rating = lambda x: x['avg_rating'] * x['num_ratings']**0.5)
51
  [lambda df: df["genres"].str.contains(genres, regex=True)]
52
+ .loc[lambda df : ((df['year'] >= time_range[0]) & ( df['year'] <= time_range[1]))]
53
  .sort_values('combined_rating', ascending=False)
54
  .head(n)
55
  [['title', 'avg_rating', 'genres', 'movieId']]
 
94
  """)
95
  number_of_recommendations = st.slider("Number of recommendations", 1, 10, 5)
96
 
97
+ st.write("""
98
+ Move the sliders to choose a timeperiod for your recommendations.
99
+ """)
100
+ time_range = st.slider('Time-period:', min_value=1900, max_value=2018, value=(1900, 2018), step=1)
101
+
102
  st.write("""
103
  __Optional__: You can narrow down the recommendations by picking one or several genre(s).
104
  However, the more genres you choose, the fewer movies will be recommended.
 
114
  streaming_country = st.selectbox('Optional: Country for streaming information', ('none', 'de', 'us'))
115
 
116
  # API INFORMATION:
117
+ # Streaming availability
118
  url = "https://streaming-availability.p.rapidapi.com/get/basic"
119
  headers = {
120
  "X-RapidAPI-Key": st.secrets["api_key"],
 
125
 
126
  if st.button("Get Recommendations"):
127
  if streaming_country == 'none':
128
+ st.write(get_popular_recommendations(number_of_recommendations, genres_regex, time_range))
129
  else:
130
+ try:
131
+ recommendations = get_popular_recommendations_streaming(number_of_recommendations, genres_regex, time_range, streaming_country, url, headers)
132
+ st.write("Double-click on a Streaming-Availability cell to see all options.", recommendations)
133
+ except:
134
+ recommendations = get_popular_recommendations(number_of_recommendations, genres_regex, time_range)
135
+ st.write('Error: Streaming information could not be gathered. Providing output without streaming availability instead.', recommendations)
pages/2 - Similarity-Based Recommender.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
  from sklearn.metrics.pairwise import cosine_similarity
4
  import re
5
  import requests
 
6
 
7
  # DATA:
8
  movies = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/movies.csv')
@@ -17,6 +18,10 @@ movies.loc[lambda df: df["title"].str.contains(", The", regex=True), 'title'] =
17
  movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = 'A ' + movies['title']
18
  movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = movies['title'].str.replace(", A", '', regex=True)
19
 
 
 
 
 
20
  # create "database" to use for recommendations
21
  movie_user_matrix = (
22
  ratings
@@ -33,7 +38,7 @@ st.title("User-Based Recommender")
33
 
34
  # FUNCTIONS:
35
 
36
- def get_similar_recommendations(movie_title, n, genres):
37
 
38
  # select similarity for chosen movie
39
  similarities = pd.DataFrame(
@@ -45,6 +50,7 @@ def get_similar_recommendations(movie_title, n, genres):
45
  similarities
46
  .merge(movies, how= 'left', left_index = True, right_on = 'title')
47
  [lambda df: df["genres"].str.contains(genres, regex=True)]
 
48
  .head(n)
49
  [['title', 'genres']]
50
  )
@@ -53,7 +59,7 @@ def get_similar_recommendations(movie_title, n, genres):
53
 
54
  return recommendations
55
 
56
- def get_similar_recommendations_streaming(movie_title, n, genres, country, url, headers):
57
 
58
  # select similarity for chosen movie
59
  similarities = pd.DataFrame(
@@ -65,6 +71,7 @@ def get_similar_recommendations_streaming(movie_title, n, genres, country, url,
65
  similarities
66
  .merge(movies, how= 'left', left_index = True, right_on = 'title')
67
  [lambda df: df["genres"].str.contains(genres, regex=True)]
 
68
  .head(n)
69
  [['title', 'genres', 'movieId']]
70
  )
@@ -125,6 +132,11 @@ Move the slider to the desired number of recommendations you wish to receive.
125
  """)
126
  number_of_recommendations = st.slider("Number of recommendations", 1, 10, 5)
127
 
 
 
 
 
 
128
  st.write("""
129
  __Optional__: You can narrow down the recommendations by picking one or several genre(s).
130
  However, the more genres you choose, the fewer movies will be recommended.
@@ -153,5 +165,9 @@ if st.button("Get Recommendations"):
153
  if streaming_country == 'none':
154
  st.write(get_similar_recommendations(movie_title, number_of_recommendations, genres_regex))
155
  else:
156
- st.write("Double-click on the Streaming-Availability column to see all links.")
157
- st.write(get_similar_recommendations_streaming(movie_title, number_of_recommendations, genres_regex, streaming_country, url, headers))
 
 
 
 
 
3
  from sklearn.metrics.pairwise import cosine_similarity
4
  import re
5
  import requests
6
+ import api_keys
7
 
8
  # DATA:
9
  movies = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/movies.csv')
 
18
  movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = 'A ' + movies['title']
19
  movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = movies['title'].str.replace(", A", '', regex=True)
20
 
21
+ # extract year from title and store it in new column
22
+ movies= movies.assign(year = lambda df_ : df_['title'].replace(r'(.*)\((\d{4})\)', r'\2', regex= True))
23
+ movies.year = pd.to_numeric(movies.year, errors= 'coerce').fillna(0).astype('int')
24
+
25
  # create "database" to use for recommendations
26
  movie_user_matrix = (
27
  ratings
 
38
 
39
  # FUNCTIONS:
40
 
41
+ def get_similar_recommendations(movie_title, n, genres, time_range):
42
 
43
  # select similarity for chosen movie
44
  similarities = pd.DataFrame(
 
50
  similarities
51
  .merge(movies, how= 'left', left_index = True, right_on = 'title')
52
  [lambda df: df["genres"].str.contains(genres, regex=True)]
53
+ .loc[lambda df : ((df['year'] >= time_range[0]) & ( df['year'] <= time_range[1]))]
54
  .head(n)
55
  [['title', 'genres']]
56
  )
 
59
 
60
  return recommendations
61
 
62
+ def get_similar_recommendations_streaming(movie_title, n, genres, time_range, country, url, headers):
63
 
64
  # select similarity for chosen movie
65
  similarities = pd.DataFrame(
 
71
  similarities
72
  .merge(movies, how= 'left', left_index = True, right_on = 'title')
73
  [lambda df: df["genres"].str.contains(genres, regex=True)]
74
+ .loc[lambda df : ((df['year'] >= time_range[0]) & ( df['year'] <= time_range[1]))]
75
  .head(n)
76
  [['title', 'genres', 'movieId']]
77
  )
 
132
  """)
133
  number_of_recommendations = st.slider("Number of recommendations", 1, 10, 5)
134
 
135
+ st.write("""
136
+ Move the sliders to choose a timeperiod for your recommendations.
137
+ """)
138
+ time_range = st.slider('Time-period:', min_value=1900, max_value=2018, value=(1900, 2018), step=1)
139
+
140
  st.write("""
141
  __Optional__: You can narrow down the recommendations by picking one or several genre(s).
142
  However, the more genres you choose, the fewer movies will be recommended.
 
165
  if streaming_country == 'none':
166
  st.write(get_similar_recommendations(movie_title, number_of_recommendations, genres_regex))
167
  else:
168
+ try:
169
+ recommendations = get_similar_recommendations_streaming(movie_title, number_of_recommendations, genres_regex, streaming_country, url, headers)
170
+ st.write("Double-click on a Streaming-Availability cell to see all options.", recommendations)
171
+ except:
172
+ recommendations = get_similar_recommendations(movie_title, number_of_recommendations, genres_regex)
173
+ st.write('Error: Streaming information could not be gathered. Providing output without streaming availability instead.', recommendations)