Spaces:
Runtime error
Runtime error
tobiasaurer
commited on
Commit
•
c0503dc
1
Parent(s):
990dd96
updated files to newest version
Browse files
pages/1 - Popularity-Based Recommender.py
CHANGED
@@ -18,23 +18,11 @@ movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = mo
|
|
18 |
|
19 |
# INSTRUCTIONS:
|
20 |
st.title("Popularity-Based Recommender")
|
21 |
-
st.write("""
|
22 |
-
### Instructions
|
23 |
-
Move the slider to the desired number of recommendations you wish to receive.
|
24 |
-
|
25 |
-
__Optional__: You can receive links for popular streaming services for each recommendation (if available) by selecting in your countrycode.
|
26 |
-
Leave this field empty if you don't want to get streaming links.
|
27 |
-
|
28 |
-
__Optional__: You can narrow down the recommendations by picking one or several genre(s).
|
29 |
-
However, the more genres you choose, the fewer movies will be recommended.
|
30 |
-
|
31 |
-
Afterwards, simply click the "Get Recommendations" button to receive recommendations based on the most popular movies in our database.
|
32 |
-
""")
|
33 |
|
34 |
# FUNCTIONS:
|
35 |
|
36 |
def get_popular_recommendations(n, genres):
|
37 |
-
|
38 |
ratings
|
39 |
.groupby('movieId')
|
40 |
.agg(avg_rating = ('rating', 'mean'), num_ratings = ('rating', 'count'))
|
@@ -44,7 +32,9 @@ def get_popular_recommendations(n, genres):
|
|
44 |
.sort_values('combined_rating', ascending=False)
|
45 |
.head(n)
|
46 |
[['title', 'avg_rating', 'genres']]
|
|
|
47 |
)
|
|
|
48 |
|
49 |
def get_popular_recommendations_streaming(n, genres, country, url, headers):
|
50 |
recommendations = (
|
@@ -81,7 +71,9 @@ def get_popular_recommendations_streaming(n, genres, country, url, headers):
|
|
81 |
for streaming_service in streaming_info['streamingInfo']:
|
82 |
recommendations_ids.loc[recommendations_ids['imdbId'] == id, 'Streaming Availability'] += f"{streaming_service}: {streaming_info['streamingInfo'][streaming_service][country]['link']} \n"
|
83 |
|
84 |
-
|
|
|
|
|
85 |
|
86 |
def transform_genre_to_regex(genres):
|
87 |
regex = ""
|
@@ -90,13 +82,25 @@ def transform_genre_to_regex(genres):
|
|
90 |
return regex
|
91 |
|
92 |
# USER INPUT:
|
|
|
|
|
|
|
|
|
93 |
number_of_recommendations = st.slider("Number of recommendations", 1, 10, 5)
|
94 |
|
|
|
|
|
|
|
|
|
95 |
genre_list = set([inner for outer in movies.genres.str.split('|') for inner in outer])
|
96 |
genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
|
97 |
genres_regex = transform_genre_to_regex(genres)
|
98 |
|
99 |
-
|
|
|
|
|
|
|
|
|
100 |
|
101 |
# API INFORMATION:
|
102 |
url = "https://streaming-availability.p.rapidapi.com/get/basic"
|
@@ -108,7 +112,8 @@ headers = {
|
|
108 |
# EXECUTION:
|
109 |
|
110 |
if st.button("Get Recommendations"):
|
111 |
-
if streaming_country == '':
|
112 |
st.write(get_popular_recommendations(number_of_recommendations, genres_regex))
|
113 |
else:
|
|
|
114 |
st.write(get_popular_recommendations_streaming(number_of_recommendations, genres_regex, streaming_country, url, headers))
|
|
|
18 |
|
19 |
# INSTRUCTIONS:
|
20 |
st.title("Popularity-Based Recommender")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
# FUNCTIONS:
|
23 |
|
24 |
def get_popular_recommendations(n, genres):
|
25 |
+
recommendations = (
|
26 |
ratings
|
27 |
.groupby('movieId')
|
28 |
.agg(avg_rating = ('rating', 'mean'), num_ratings = ('rating', 'count'))
|
|
|
32 |
.sort_values('combined_rating', ascending=False)
|
33 |
.head(n)
|
34 |
[['title', 'avg_rating', 'genres']]
|
35 |
+
.rename(columns= {'title': 'Movie Title', 'avg_rating': 'Average Rating', 'genres': 'Genres'}, inplace = True)
|
36 |
)
|
37 |
+
return recommendations
|
38 |
|
39 |
def get_popular_recommendations_streaming(n, genres, country, url, headers):
|
40 |
recommendations = (
|
|
|
71 |
for streaming_service in streaming_info['streamingInfo']:
|
72 |
recommendations_ids.loc[recommendations_ids['imdbId'] == id, 'Streaming Availability'] += f"{streaming_service}: {streaming_info['streamingInfo'][streaming_service][country]['link']} \n"
|
73 |
|
74 |
+
recommendations_ids.rename(columns= {'title': 'Movie Title', 'genres': 'Genres'}, inplace = True)
|
75 |
+
|
76 |
+
return recommendations_ids[['Movie Title', 'Genres', 'Streaming Availability']]
|
77 |
|
78 |
def transform_genre_to_regex(genres):
|
79 |
regex = ""
|
|
|
82 |
return regex
|
83 |
|
84 |
# USER INPUT:
|
85 |
+
|
86 |
+
st.write("""
|
87 |
+
Move the slider to the desired number of recommendations you wish to receive.
|
88 |
+
""")
|
89 |
number_of_recommendations = st.slider("Number of recommendations", 1, 10, 5)
|
90 |
|
91 |
+
st.write("""
|
92 |
+
__Optional__: You can narrow down the recommendations by picking one or several genre(s).
|
93 |
+
However, the more genres you choose, the fewer movies will be recommended.
|
94 |
+
""")
|
95 |
genre_list = set([inner for outer in movies.genres.str.split('|') for inner in outer])
|
96 |
genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
|
97 |
genres_regex = transform_genre_to_regex(genres)
|
98 |
|
99 |
+
st.write("""
|
100 |
+
__Optional__: You can receive links for popular streaming services for each recommendation (if available) by selecting your countrycode.
|
101 |
+
Select none if you don't want to get streaming links.
|
102 |
+
""")
|
103 |
+
streaming_country = st.selectbox('Optional: Country for streaming information', ('none', 'de', 'us'))
|
104 |
|
105 |
# API INFORMATION:
|
106 |
url = "https://streaming-availability.p.rapidapi.com/get/basic"
|
|
|
112 |
# EXECUTION:
|
113 |
|
114 |
if st.button("Get Recommendations"):
|
115 |
+
if streaming_country == 'none':
|
116 |
st.write(get_popular_recommendations(number_of_recommendations, genres_regex))
|
117 |
else:
|
118 |
+
st.write("Double-click on a Streaming-Availability cell to see all options.")
|
119 |
st.write(get_popular_recommendations_streaming(number_of_recommendations, genres_regex, streaming_country, url, headers))
|
pages/2 - Similarity-Based Recommender.py
CHANGED
@@ -28,21 +28,8 @@ similarities_movies = pd.DataFrame(cosine_similarity(movie_user_matrix),
|
|
28 |
index=movie_user_matrix.index,
|
29 |
columns=movie_user_matrix.index)
|
30 |
|
31 |
-
#
|
32 |
st.title("User-Based Recommender")
|
33 |
-
st.write("""
|
34 |
-
### Instructions
|
35 |
-
Type in the title of a movie for which you would like to receive similar recommendations.
|
36 |
-
Move the slider to the desired number of recommendations you wish to receive.
|
37 |
-
|
38 |
-
__Optional__: You can receive links for popular streaming services for each recommendation (if available) by selecting in your countrycode.
|
39 |
-
Leave this field empty if you don't want to get streaming links.
|
40 |
-
|
41 |
-
__Optional__: You can narrow down the recommendations by picking one or several genre(s).
|
42 |
-
However, the more genres you choose, the fewer movies will be recommended.
|
43 |
-
|
44 |
-
Afterwards, simply click the "Get Recommendations" button to receive recommendations that are most similar to the given movie.
|
45 |
-
""")
|
46 |
|
47 |
# FUNCTIONS:
|
48 |
|
@@ -61,6 +48,8 @@ def get_similar_recommendations(movie_title, n, genres):
|
|
61 |
.head(n)
|
62 |
[['title', 'genres']]
|
63 |
)
|
|
|
|
|
64 |
|
65 |
return recommendations
|
66 |
|
@@ -99,7 +88,9 @@ def get_similar_recommendations_streaming(movie_title, n, genres, country, url,
|
|
99 |
for streaming_service in streaming_info['streamingInfo']:
|
100 |
recommendations_ids.loc[recommendations_ids['imdbId'] == id, 'Streaming Availability'] += f"{streaming_service}: {streaming_info['streamingInfo'][streaming_service][country]['link']} \n"
|
101 |
|
102 |
-
|
|
|
|
|
103 |
|
104 |
|
105 |
def transform_genre_to_regex(genres):
|
@@ -121,17 +112,33 @@ def find_movie_title(user_input):
|
|
121 |
|
122 |
return result[0][0]
|
123 |
|
|
|
124 |
# USER INPUT:
|
|
|
|
|
|
|
125 |
movie_title_raw = st.text_input('Movie Title')
|
126 |
movie_title = find_movie_title(movie_title_raw)
|
127 |
|
|
|
|
|
|
|
128 |
number_of_recommendations = st.slider("Number of recommendations", 1, 10, 5)
|
129 |
|
|
|
|
|
|
|
|
|
130 |
genre_list = set([inner for outer in movies.genres.str.split('|') for inner in outer])
|
131 |
genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
|
132 |
genres_regex = transform_genre_to_regex(genres)
|
133 |
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
# API INFORMATION:
|
137 |
url = "https://streaming-availability.p.rapidapi.com/get/basic"
|
@@ -143,7 +150,7 @@ headers = {
|
|
143 |
|
144 |
# EXECUTION:
|
145 |
if st.button("Get Recommendations"):
|
146 |
-
if streaming_country == '':
|
147 |
st.write(get_similar_recommendations(movie_title, number_of_recommendations, genres_regex))
|
148 |
else:
|
149 |
st.write("Double-click on the Streaming-Availability column to see all links.")
|
|
|
28 |
index=movie_user_matrix.index,
|
29 |
columns=movie_user_matrix.index)
|
30 |
|
31 |
+
# TITLE:
|
32 |
st.title("User-Based Recommender")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
# FUNCTIONS:
|
35 |
|
|
|
48 |
.head(n)
|
49 |
[['title', 'genres']]
|
50 |
)
|
51 |
+
|
52 |
+
recommendations.rename(columns= {'title': 'Movie Title', 'genres': 'Genres'}, inplace = True)
|
53 |
|
54 |
return recommendations
|
55 |
|
|
|
88 |
for streaming_service in streaming_info['streamingInfo']:
|
89 |
recommendations_ids.loc[recommendations_ids['imdbId'] == id, 'Streaming Availability'] += f"{streaming_service}: {streaming_info['streamingInfo'][streaming_service][country]['link']} \n"
|
90 |
|
91 |
+
recommendations_ids.rename(columns= {'title': 'Movie Title', 'genres': 'Genres'}, inplace = True)
|
92 |
+
|
93 |
+
return recommendations_ids[['Movie Title', 'Genres', 'Streaming Availability']]
|
94 |
|
95 |
|
96 |
def transform_genre_to_regex(genres):
|
|
|
112 |
|
113 |
return result[0][0]
|
114 |
|
115 |
+
|
116 |
# USER INPUT:
|
117 |
+
st.write("""
|
118 |
+
Type in the name of your movie.
|
119 |
+
""")
|
120 |
movie_title_raw = st.text_input('Movie Title')
|
121 |
movie_title = find_movie_title(movie_title_raw)
|
122 |
|
123 |
+
st.write("""
|
124 |
+
Move the slider to the desired number of recommendations you wish to receive.
|
125 |
+
""")
|
126 |
number_of_recommendations = st.slider("Number of recommendations", 1, 10, 5)
|
127 |
|
128 |
+
st.write("""
|
129 |
+
__Optional__: You can narrow down the recommendations by picking one or several genre(s).
|
130 |
+
However, the more genres you choose, the fewer movies will be recommended.
|
131 |
+
""")
|
132 |
genre_list = set([inner for outer in movies.genres.str.split('|') for inner in outer])
|
133 |
genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
|
134 |
genres_regex = transform_genre_to_regex(genres)
|
135 |
|
136 |
+
st.write("""
|
137 |
+
__Optional__: You can receive links for popular streaming services for each recommendation (if available) by selecting your countrycode.
|
138 |
+
Select none if you don't want to get streaming links.
|
139 |
+
""")
|
140 |
+
streaming_country = st.selectbox('Optional: Country for streaming information', ('none', 'de', 'us'))
|
141 |
+
|
142 |
|
143 |
# API INFORMATION:
|
144 |
url = "https://streaming-availability.p.rapidapi.com/get/basic"
|
|
|
150 |
|
151 |
# EXECUTION:
|
152 |
if st.button("Get Recommendations"):
|
153 |
+
if streaming_country == 'none':
|
154 |
st.write(get_similar_recommendations(movie_title, number_of_recommendations, genres_regex))
|
155 |
else:
|
156 |
st.write("Double-click on the Streaming-Availability column to see all links.")
|
pages/3 - User-Based Recommender.py
DELETED
@@ -1,89 +0,0 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import pandas as pd
|
3 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
4 |
-
import re
|
5 |
-
import requests
|
6 |
-
|
7 |
-
# DATA:
|
8 |
-
movies = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/movies.csv')
|
9 |
-
ratings = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/ratings.csv')
|
10 |
-
links = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/links.csv')
|
11 |
-
|
12 |
-
# clean titles column by moving "The" and "A" to the beginning of the string
|
13 |
-
# this makes it more searchable for users
|
14 |
-
movies.loc[lambda df: df["title"].str.contains(", The", regex=True), 'title'] = 'The ' + movies['title']
|
15 |
-
movies.loc[lambda df: df["title"].str.contains(", The", regex=True), 'title'] = movies['title'].str.replace(", The", '', regex=True)
|
16 |
-
|
17 |
-
movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = 'A ' + movies['title']
|
18 |
-
movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = movies['title'].str.replace(", A", '', regex=True)
|
19 |
-
|
20 |
-
# create "database" to use for recommendations
|
21 |
-
user_item_matrix = (
|
22 |
-
ratings
|
23 |
-
.merge(movies, on='movieId')[['title', 'rating', 'userId']]
|
24 |
-
.pivot_table(index='userId', columns='title', values='rating')
|
25 |
-
.fillna(0)
|
26 |
-
)
|
27 |
-
|
28 |
-
similarities_users = pd.DataFrame(cosine_similarity(user_item_matrix),
|
29 |
-
index=user_item_matrix.index,
|
30 |
-
columns=user_item_matrix.index)
|
31 |
-
|
32 |
-
# INSTRUCTIONS:
|
33 |
-
st.title("User-Based Recommender")
|
34 |
-
st.write("""
|
35 |
-
### Instructions
|
36 |
-
Type in the user-ID you want to receive recommendations for.
|
37 |
-
Move the slider to the desired number of recommendations you wish to receive.
|
38 |
-
|
39 |
-
__Optional__: You can narrow down the recommendations by picking one or several genre(s).
|
40 |
-
However, the more genres you choose, the fewer movies will be recommended.
|
41 |
-
|
42 |
-
Afterwards, simply click the "Get Recommendations" button to receive recommendations that are most suitable for the given user.
|
43 |
-
""")
|
44 |
-
|
45 |
-
# FUNCTIONS:
|
46 |
-
|
47 |
-
def get_user_recommendations(user_id, n, genres):
|
48 |
-
|
49 |
-
user_id = int(user_id)
|
50 |
-
# calculate weights for ratings
|
51 |
-
weights = similarities_users.loc[similarities_users.index != user_id, user_id] / sum(similarities_users.loc[similarities_users.index != user_id, user_id])
|
52 |
-
|
53 |
-
# get unwatched movies for recommendations
|
54 |
-
unwatched_movies = (
|
55 |
-
user_item_matrix
|
56 |
-
.loc[user_item_matrix.index != user_id, user_item_matrix.loc[user_id,:] == 0]
|
57 |
-
.T
|
58 |
-
)
|
59 |
-
|
60 |
-
# compute weighted averages and return the n movies with the highest predicted ratings
|
61 |
-
weighted_averages = pd.DataFrame(unwatched_movies.dot(weights), columns = ["predicted_rating"])
|
62 |
-
recommendations = (
|
63 |
-
weighted_averages
|
64 |
-
.sort_values("predicted_rating", ascending=False)
|
65 |
-
.merge(movies, how= 'left', left_index = True, right_on = 'title')
|
66 |
-
[lambda df: df["genres"].str.contains(genres, regex=True)]
|
67 |
-
.head(n)
|
68 |
-
)
|
69 |
-
|
70 |
-
return recommendations[['title', 'genres']]
|
71 |
-
|
72 |
-
def transform_genre_to_regex(genres):
|
73 |
-
regex = ""
|
74 |
-
for genre in genres:
|
75 |
-
regex += f"(?=.*{genre})"
|
76 |
-
return regex
|
77 |
-
|
78 |
-
# USER INPUT:
|
79 |
-
user_id_input = st.text_input('User-ID')
|
80 |
-
|
81 |
-
number_of_recommendations = st.slider("Number of recommendations", 1, 10, 5)
|
82 |
-
|
83 |
-
genre_list = set([inner for outer in movies.genres.str.split('|') for inner in outer])
|
84 |
-
genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
|
85 |
-
genres_regex = transform_genre_to_regex(genres)
|
86 |
-
|
87 |
-
# EXECUTION:
|
88 |
-
if st.button("Get Recommendations"):
|
89 |
-
st.write(get_user_recommendations(user_id_input, number_of_recommendations, genres_regex))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|