Spaces:
Runtime error
Runtime error
tobiasaurer
commited on
Commit
•
807a8f8
1
Parent(s):
b3cb585
formatting changes and implementation of streaming
Browse files
pages/1 - Popularity-Based Recommender.py
CHANGED
@@ -1,19 +1,34 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
|
|
|
|
3 |
|
4 |
# DATA:
|
5 |
-
movies = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/recommender-
|
6 |
-
ratings = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/recommender-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
# INSTRUCTIONS:
|
9 |
st.title("Popularity-Based Recommender")
|
10 |
st.write("""
|
11 |
### Instructions
|
12 |
Move the slider to the desired number of recommendations you wish to receive.
|
13 |
-
|
|
|
|
|
14 |
|
15 |
__Optional__: You can narrow down the recommendations by picking one or several genre(s).
|
16 |
However, the more genres you choose, the fewer movies will be recommended.
|
|
|
|
|
17 |
""")
|
18 |
|
19 |
# FUNCTIONS:
|
@@ -31,6 +46,39 @@ def get_popular_recommendations(n, genres):
|
|
31 |
[['title', 'avg_rating', 'genres']]
|
32 |
)
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
def transform_genre_to_regex(genres):
|
35 |
regex = ""
|
36 |
for genre in genres:
|
@@ -44,6 +92,19 @@ genre_list = set([inner for outer in movies.genres.str.split('|') for inner in o
|
|
44 |
genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
|
45 |
genres_regex = transform_genre_to_regex(genres)
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
# EXECUTION:
|
|
|
48 |
if st.button("Get Recommendations"):
|
49 |
-
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
+
import re
|
4 |
+
import requests
|
5 |
|
6 |
# DATA:
|
7 |
+
movies = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/movies.csv')
|
8 |
+
ratings = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/ratings.csv')
|
9 |
+
links = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/links.csv')
|
10 |
+
|
11 |
+
# clean titles column by moving "The" and "A" to the beginning of the string
|
12 |
+
# this makes it more searchable for users
|
13 |
+
movies.loc[lambda df: df["title"].str.contains(", The", regex=True), 'title'] = 'The ' + movies['title']
|
14 |
+
movies.loc[lambda df: df["title"].str.contains(", The", regex=True), 'title'] = movies['title'].str.replace(", The", '', regex=True)
|
15 |
+
|
16 |
+
movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = 'A ' + movies['title']
|
17 |
+
movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = movies['title'].str.replace(", A", '', regex=True)
|
18 |
|
19 |
# INSTRUCTIONS:
|
20 |
st.title("Popularity-Based Recommender")
|
21 |
st.write("""
|
22 |
### Instructions
|
23 |
Move the slider to the desired number of recommendations you wish to receive.
|
24 |
+
|
25 |
+
__Optional__: You can receive links for popular streaming services for each recommendation (if available) by selecting in your countrycode.
|
26 |
+
Leave this field empty if you don't want to get streaming links.
|
27 |
|
28 |
__Optional__: You can narrow down the recommendations by picking one or several genre(s).
|
29 |
However, the more genres you choose, the fewer movies will be recommended.
|
30 |
+
|
31 |
+
Afterwards, simply click the "Get Recommendations" button to receive recommendations based on the most popular movies in our database.
|
32 |
""")
|
33 |
|
34 |
# FUNCTIONS:
|
|
|
46 |
[['title', 'avg_rating', 'genres']]
|
47 |
)
|
48 |
|
49 |
+
def get_popular_recommendations_streaming(n, genres, country, url, headers):
|
50 |
+
recommendations = (
|
51 |
+
ratings
|
52 |
+
.groupby('movieId')
|
53 |
+
.agg(avg_rating = ('rating', 'mean'), num_ratings = ('rating', 'count'))
|
54 |
+
.merge(movies, on='movieId')
|
55 |
+
.assign(combined_rating = lambda x: x['avg_rating'] * x['num_ratings']**0.5)
|
56 |
+
[lambda df: df["genres"].str.contains(genres, regex=True)]
|
57 |
+
.sort_values('combined_rating', ascending=False)
|
58 |
+
.head(n)
|
59 |
+
[['title', 'avg_rating', 'genres']]
|
60 |
+
)
|
61 |
+
# merge recommendations with links df to get imdbIds for the API calls
|
62 |
+
recommendations_ids = recommendations.merge(links, how = 'left', on = 'movieId')[['title', 'genres', 'imdbId']]
|
63 |
+
recommendations_ids['imdbId'] = 'tt0' + recommendations_ids['imdbId'].astype('str')
|
64 |
+
imdb_ids = list(recommendations_ids['imdbId'])
|
65 |
+
|
66 |
+
# create new column for streaming links
|
67 |
+
recommendations_ids['Streaming Availability'] = ""
|
68 |
+
|
69 |
+
# loop through imdb_ids to make one api call for each to get available streaming links
|
70 |
+
for id in imdb_ids:
|
71 |
+
|
72 |
+
# make api call
|
73 |
+
querystring = {"country":country,"imdb_id":id,"output_language":"en"}
|
74 |
+
response = requests.request("GET", url, headers=headers, params=querystring)
|
75 |
+
streaming_info = response.json()
|
76 |
+
|
77 |
+
for streaming_service in streaming_info['streamingInfo']:
|
78 |
+
recommendations_ids.loc[recommendations_ids['imdbId'] == id, 'Streaming Availability'] += f"{streaming_service}: {streaming_info['streamingInfo'][streaming_service][country]['link']} \n"
|
79 |
+
|
80 |
+
return recommendations_ids[['title', 'genres', 'Streaming Availability']]
|
81 |
+
|
82 |
def transform_genre_to_regex(genres):
|
83 |
regex = ""
|
84 |
for genre in genres:
|
|
|
92 |
genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
|
93 |
genres_regex = transform_genre_to_regex(genres)
|
94 |
|
95 |
+
streaming_country = st.multiselect('Optional: Country for streaming information', ['de', 'us'], default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
|
96 |
+
|
97 |
+
# API INFORMATION:
|
98 |
+
url = "https://streaming-availability.p.rapidapi.com/get/basic"
|
99 |
+
headers = {
|
100 |
+
"X-RapidAPI-Key": st.secrets["api_key"],
|
101 |
+
"X-RapidAPI-Host": "streaming-availability.p.rapidapi.com"
|
102 |
+
}
|
103 |
+
|
104 |
# EXECUTION:
|
105 |
+
|
106 |
if st.button("Get Recommendations"):
|
107 |
+
if streaming_country == '':
|
108 |
+
st.write(get_popular_recommendations(number_of_recommendations, genres_regex))
|
109 |
+
else:
|
110 |
+
st.write(get_popular_recommendations_streaming(number_of_recommendations, genres_regex, streaming_country, url, headers))
|
pages/3 - Similarity-Based Recommender.py
CHANGED
@@ -35,12 +35,13 @@ st.write("""
|
|
35 |
Type in the title of a movie for which you would like to receive similar recommendations.
|
36 |
Move the slider to the desired number of recommendations you wish to receive.
|
37 |
|
38 |
-
|
39 |
Leave this field empty if you don't want to get streaming links.
|
40 |
-
Afterwards, simply click the "Get Recommendations" button to receive recommendations that are most similar to the given movie.
|
41 |
|
42 |
__Optional__: You can narrow down the recommendations by picking one or several genre(s).
|
43 |
However, the more genres you choose, the fewer movies will be recommended.
|
|
|
|
|
44 |
""")
|
45 |
|
46 |
# FUNCTIONS:
|
@@ -130,7 +131,7 @@ genre_list = set([inner for outer in movies.genres.str.split('|') for inner in o
|
|
130 |
genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
|
131 |
genres_regex = transform_genre_to_regex(genres)
|
132 |
|
133 |
-
streaming_country = st.
|
134 |
|
135 |
# API INFORMATION:
|
136 |
url = "https://streaming-availability.p.rapidapi.com/get/basic"
|
|
|
35 |
Type in the title of a movie for which you would like to receive similar recommendations.
|
36 |
Move the slider to the desired number of recommendations you wish to receive.
|
37 |
|
38 |
+
__Optional__: You can receive links for popular streaming services for each recommendation (if available) by selecting in your countrycode.
|
39 |
Leave this field empty if you don't want to get streaming links.
|
|
|
40 |
|
41 |
__Optional__: You can narrow down the recommendations by picking one or several genre(s).
|
42 |
However, the more genres you choose, the fewer movies will be recommended.
|
43 |
+
|
44 |
+
Afterwards, simply click the "Get Recommendations" button to receive recommendations that are most similar to the given movie.
|
45 |
""")
|
46 |
|
47 |
# FUNCTIONS:
|
|
|
131 |
genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
|
132 |
genres_regex = transform_genre_to_regex(genres)
|
133 |
|
134 |
+
streaming_country = st.multiselect('Optional: Country for streaming information', ['de', 'us'], default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
|
135 |
|
136 |
# API INFORMATION:
|
137 |
url = "https://streaming-availability.p.rapidapi.com/get/basic"
|