Spaces:
Runtime error
Runtime error
tobiasaurer
commited on
Commit
•
bf962cf
1
Parent(s):
eb59333
add api-integration
Browse files
pages/3 - Similarity-Based Recommender.py
CHANGED
@@ -2,10 +2,20 @@ import streamlit as st
|
|
2 |
import pandas as pd
|
3 |
from sklearn.metrics.pairwise import cosine_similarity
|
4 |
import re
|
|
|
5 |
|
6 |
# DATA:
|
7 |
-
movies = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/recommender-
|
8 |
-
ratings = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/recommender-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
# create "database" to use for recommendations
|
11 |
movie_user_matrix = (
|
@@ -24,6 +34,8 @@ st.write("""
|
|
24 |
### Instructions
|
25 |
Type in the title of a movie for which you would like to receive similar recommendations.
|
26 |
Move the slider to the desired number of recommendations you wish to receive.
|
|
|
|
|
27 |
Afterwards, simply click the "Get Recommendations" button to receive recommendations that are most similar to the given movie.
|
28 |
|
29 |
__Optional__: You can narrow down the recommendations by picking one or several genre(s).
|
@@ -50,6 +62,44 @@ def get_similar_recommendations(movie_title, n, genres):
|
|
50 |
|
51 |
return recommendations
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
def transform_genre_to_regex(genres):
|
54 |
regex = ""
|
55 |
for genre in genres:
|
@@ -79,6 +129,20 @@ genre_list = set([inner for outer in movies.genres.str.split('|') for inner in o
|
|
79 |
genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
|
80 |
genres_regex = transform_genre_to_regex(genres)
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
# EXECUTION:
|
83 |
if st.button("Get Recommendations"):
|
84 |
-
|
|
|
|
|
|
|
|
|
|
2 |
import pandas as pd
|
3 |
from sklearn.metrics.pairwise import cosine_similarity
|
4 |
import re
|
5 |
+
import requests
|
6 |
|
7 |
# DATA:
|
8 |
+
movies = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/movies.csv')
|
9 |
+
ratings = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/ratings.csv')
|
10 |
+
links = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/links.csv')
|
11 |
+
|
12 |
+
# clean titles column by moving "The" and "A" to the beginning of the string
|
13 |
+
# this makes it more searchable for users
|
14 |
+
movies.loc[lambda df: df["title"].str.contains(", The", regex=True), 'title'] = 'The ' + movies['title']
|
15 |
+
movies.loc[lambda df: df["title"].str.contains(", The", regex=True), 'title'] = movies['title'].str.replace(", The", '', regex=True)
|
16 |
+
|
17 |
+
movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = 'A ' + movies['title']
|
18 |
+
movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = movies['title'].str.replace(", A", '', regex=True)
|
19 |
|
20 |
# create "database" to use for recommendations
|
21 |
movie_user_matrix = (
|
|
|
34 |
### Instructions
|
35 |
Type in the title of a movie for which you would like to receive similar recommendations.
|
36 |
Move the slider to the desired number of recommendations you wish to receive.
|
37 |
+
If you want to receive links for popular streaming services for the recommendations, type in your countrycode (popular values are "us" for the United States, and "de" for Germany)
|
38 |
+
Leave this field empty if you don't want to get links.
|
39 |
Afterwards, simply click the "Get Recommendations" button to receive recommendations that are most similar to the given movie.
|
40 |
|
41 |
__Optional__: You can narrow down the recommendations by picking one or several genre(s).
|
|
|
62 |
|
63 |
return recommendations
|
64 |
|
65 |
+
def get_similar_recommendations_streaming(movie_title, n, genres, country, url, headers):
|
66 |
+
|
67 |
+
# select similarity for chosen movie
|
68 |
+
similarities = pd.DataFrame(
|
69 |
+
(similarities_movies.query("index != @movie_title")[movie_title] / sum(similarities_movies.query("index != @movie_title")[movie_title]))
|
70 |
+
.sort_values(ascending= False))
|
71 |
+
|
72 |
+
# exclude genres if necessary and return the n movies with the highest similarity
|
73 |
+
recommendations = (
|
74 |
+
similarities
|
75 |
+
.merge(movies, how= 'left', left_index = True, right_on = 'title')
|
76 |
+
[lambda df: df["genres"].str.contains(genres, regex=True)]
|
77 |
+
.head(n)
|
78 |
+
[['title', 'genres', 'movieId']]
|
79 |
+
)
|
80 |
+
|
81 |
+
# merge recommendations with links df to get imdbIds for the API calls
|
82 |
+
recommendations_ids = recommendations.merge(links, how = 'left', on = 'movieId')[['title', 'genres', 'imdbId']]
|
83 |
+
recommendations_ids['imdbId'] = 'tt0' + recommendations_ids['imdbId'].astype('str')
|
84 |
+
imdb_ids = list(recommendations_ids['imdbId'])
|
85 |
+
|
86 |
+
# create new column for streaming links
|
87 |
+
recommendations_ids['Streaming Availability'] = ""
|
88 |
+
|
89 |
+
# loop through imdb_ids to make one api call for each to get available streaming links
|
90 |
+
for id in imdb_ids:
|
91 |
+
|
92 |
+
# make api call
|
93 |
+
querystring = {"country":country,"imdb_id":id,"output_language":"en"}
|
94 |
+
response = requests.request("GET", url, headers=headers, params=querystring)
|
95 |
+
streaming_info = response.json()
|
96 |
+
|
97 |
+
for streaming_service in streaming_info['streamingInfo']:
|
98 |
+
recommendations_ids.loc[recommendations_ids['imdbId'] == id, 'Streaming Availability'] += f"{streaming_service}: {streaming_info['streamingInfo'][streaming_service][country]['link']} \n"
|
99 |
+
|
100 |
+
return recommendations_ids[['title', 'genres', 'Streaming Availability']]
|
101 |
+
|
102 |
+
|
103 |
def transform_genre_to_regex(genres):
|
104 |
regex = ""
|
105 |
for genre in genres:
|
|
|
129 |
genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
|
130 |
genres_regex = transform_genre_to_regex(genres)
|
131 |
|
132 |
+
streaming_country = st.text_input('Country for streaming information (e.g. "de" for Germany)')
|
133 |
+
|
134 |
+
# API INFORMATION:
|
135 |
+
url = "https://streaming-availability.p.rapidapi.com/get/basic"
|
136 |
+
headers = {
|
137 |
+
"X-RapidAPI-Key": api_key,
|
138 |
+
"X-RapidAPI-Host": "streaming-availability.p.rapidapi.com"
|
139 |
+
}
|
140 |
+
|
141 |
+
|
142 |
# EXECUTION:
|
143 |
if st.button("Get Recommendations"):
|
144 |
+
if streaming_country == '':
|
145 |
+
st.write(get_similar_recommendations(movie_title, number_of_recommendations, genres_regex))
|
146 |
+
else:
|
147 |
+
st.write("Double-click on the Streaming-Availability column to see all links.")
|
148 |
+
st.write(get_similar_recommendations_streaming(movie_title, number_of_recommendations, genres_regex, streaming_country, url, headers))
|