Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pandas as pd | |
from sklearn.metrics.pairwise import cosine_similarity | |
import re | |
import requests | |
# DATA: | |
movies = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/movies.csv') | |
ratings = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/ratings.csv') | |
links = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/links.csv') | |
# clean titles column by moving "The" and "A" to the beginning of the string | |
# this makes it more searchable for users | |
movies.loc[lambda df: df["title"].str.contains(", The", regex=True), 'title'] = 'The ' + movies['title'] | |
movies.loc[lambda df: df["title"].str.contains(", The", regex=True), 'title'] = movies['title'].str.replace(", The", '', regex=True) | |
movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = 'A ' + movies['title'] | |
movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = movies['title'].str.replace(", A", '', regex=True) | |
# create "database" to use for recommendations | |
movie_user_matrix = ( | |
ratings | |
.merge(movies, on='movieId')[['title', 'rating', 'userId']] | |
.pivot_table(index='title', columns='userId', values='rating') | |
.fillna(0) | |
) | |
similarities_movies = pd.DataFrame(cosine_similarity(movie_user_matrix), | |
index=movie_user_matrix.index, | |
columns=movie_user_matrix.index) | |
# INSTRUCTIONS: | |
st.title("User-Based Recommender") | |
st.write(""" | |
### Instructions | |
Type in the title of a movie for which you would like to receive similar recommendations. | |
Move the slider to the desired number of recommendations you wish to receive. | |
If you want to receive links for popular streaming services for the recommendations, type in your countrycode (popular values are "us" for the United States, and "de" for Germany) | |
Leave this field empty if you don't want to get links. | |
Afterwards, simply click the "Get Recommendations" button to receive recommendations that are most similar to the given movie. | |
__Optional__: You can narrow down the recommendations by picking one or several genre(s). | |
However, the more genres you choose, the fewer movies will be recommended. | |
""") | |
# FUNCTIONS: | |
def get_similar_recommendations(movie_title, n, genres): | |
# select similarity for chosen movie | |
similarities = pd.DataFrame( | |
(similarities_movies.query("index != @movie_title")[movie_title] / sum(similarities_movies.query("index != @movie_title")[movie_title])) | |
.sort_values(ascending= False)) | |
# exclude genres if necessary and return the n movies with the highest similarity | |
recommendations = ( | |
similarities | |
.merge(movies, how= 'left', left_index = True, right_on = 'title') | |
[lambda df: df["genres"].str.contains(genres, regex=True)] | |
.head(n) | |
[['title', 'genres']] | |
) | |
return recommendations | |
def get_similar_recommendations_streaming(movie_title, n, genres, country, url, headers): | |
# select similarity for chosen movie | |
similarities = pd.DataFrame( | |
(similarities_movies.query("index != @movie_title")[movie_title] / sum(similarities_movies.query("index != @movie_title")[movie_title])) | |
.sort_values(ascending= False)) | |
# exclude genres if necessary and return the n movies with the highest similarity | |
recommendations = ( | |
similarities | |
.merge(movies, how= 'left', left_index = True, right_on = 'title') | |
[lambda df: df["genres"].str.contains(genres, regex=True)] | |
.head(n) | |
[['title', 'genres', 'movieId']] | |
) | |
# merge recommendations with links df to get imdbIds for the API calls | |
recommendations_ids = recommendations.merge(links, how = 'left', on = 'movieId')[['title', 'genres', 'imdbId']] | |
recommendations_ids['imdbId'] = 'tt0' + recommendations_ids['imdbId'].astype('str') | |
imdb_ids = list(recommendations_ids['imdbId']) | |
# create new column for streaming links | |
recommendations_ids['Streaming Availability'] = "" | |
# loop through imdb_ids to make one api call for each to get available streaming links | |
for id in imdb_ids: | |
# make api call | |
querystring = {"country":country,"imdb_id":id,"output_language":"en"} | |
response = requests.request("GET", url, headers=headers, params=querystring) | |
streaming_info = response.json() | |
for streaming_service in streaming_info['streamingInfo']: | |
recommendations_ids.loc[recommendations_ids['imdbId'] == id, 'Streaming Availability'] += f"{streaming_service}: {streaming_info['streamingInfo'][streaming_service][country]['link']} \n" | |
return recommendations_ids[['title', 'genres', 'Streaming Availability']] | |
def transform_genre_to_regex(genres): | |
regex = "" | |
for genre in genres: | |
regex += f"(?=.*{genre})" | |
return regex | |
def find_movie_title(user_input): | |
title_list = movies.title.unique() | |
r = re.compile(f".*{user_input}.*") | |
result = [] | |
for title in title_list: | |
match = r.findall(title) | |
if match: | |
result.append(match) | |
return result[0][0] | |
# USER INPUT: | |
movie_title_raw = st.text_input('Movie Title') | |
movie_title = find_movie_title(movie_title_raw) | |
number_of_recommendations = st.slider("Number of recommendations", 1, 10, 5) | |
genre_list = set([inner for outer in movies.genres.str.split('|') for inner in outer]) | |
genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False) | |
genres_regex = transform_genre_to_regex(genres) | |
streaming_country = st.text_input('Country for streaming information (e.g. "de" for Germany)') | |
# API INFORMATION: | |
url = "https://streaming-availability.p.rapidapi.com/get/basic" | |
headers = { | |
"X-RapidAPI-Key": api_key, | |
"X-RapidAPI-Host": "streaming-availability.p.rapidapi.com" | |
} | |
# EXECUTION: | |
if st.button("Get Recommendations"): | |
if streaming_country == '': | |
st.write(get_similar_recommendations(movie_title, number_of_recommendations, genres_regex)) | |
else: | |
st.write("Double-click on the Streaming-Availability column to see all links.") | |
st.write(get_similar_recommendations_streaming(movie_title, number_of_recommendations, genres_regex, streaming_country, url, headers)) |